update soundtouch library sources

2013-04-15 16:05:18 +00:00 · 2013-04-15 16:05:18 +00:00 · 982cc77111
parent 734ed9a271
commit 982cc77111
17 changed files with 1399 additions and 1320 deletions
--- a/desmume/src/metaspu/SoundTouch/BPMDetect.h
+++ b/desmume/src/metaspu/SoundTouch/BPMDetect.h
@ -26,10 +26,10 @@
 ///
 ////////////////////////////////////////////////////////////////////////////////
 //
-// Last changed  : $Date: 2006/02/05 16:44:06 $
-// File revision : $Revision: 1.5 $
+// Last changed  : $Date: 2012-08-30 16:53:44 -0300 (qui, 30 ago 2012) $
+// File revision : $Revision: 4 $
 //
-// $Id: BPMDetect.h,v 1.5 2006/02/05 16:44:06 Olli Exp $
+// $Id: BPMDetect.h 150 2012-08-30 19:53:44Z oparviai $
 //
 ////////////////////////////////////////////////////////////////////////////////
 //
@ -60,11 +60,14 @@
 #include "STTypes.h"
 #include "FIFOSampleBuffer.h"

+namespace soundtouch
+{
+
 /// Minimum allowed BPM rate. Used to restrict accepted result above a reasonable limit.
-#define MIN_BPM 45
+#define MIN_BPM 29

 /// Maximum allowed BPM rate. Used to restrict accepted result below a reasonable limit.
-#define MAX_BPM 230
+#define MAX_BPM 200


 /// Class for calculating BPM rate for audio data.
@ -75,10 +78,10 @@ protected:
    float *xcorr;
    
    /// Amplitude envelope sliding average approximation level accumulator
-    float envelopeAccu;
+    double envelopeAccu;

    /// RMS volume sliding average approximation level accumulator
-    float RMSVolumeAccu;
+    double RMSVolumeAccu;

    /// Sample average counter.
    int decimateCount;
@ -105,9 +108,6 @@ protected:
    /// FIFO-buffer for decimated processing samples.
    soundtouch::FIFOSampleBuffer *buffer;

-    /// Initialize the class for processing.
-    void init(int numChannels, int sampleRate);
-
    /// Updates auto-correlation function for given number of decimated samples that 
    /// are read from the internal 'buffer' pipe (samples aren't removed from the pipe 
    /// though).
@ -128,6 +128,9 @@ protected:
                      int numsamples                    ///< Number of samples in buffer
                      );

+    /// remove constant bias from xcorr data
+    void removeBias();
+
 public:
    /// Constructor.
    BPMDetect(int numChannels,  ///< Number of channels in sample data.
@ -143,8 +146,8 @@ public:
    /// function. 
    /// 
    /// Notice that data in 'samples' array can be disrupted in processing.
-    void inputSamples(soundtouch::SAMPLETYPE *samples,  ///< Pointer to input/working data buffer
-                      int numSamples                    ///< Number of samples in buffer
+    void inputSamples(const soundtouch::SAMPLETYPE *samples,    ///< Pointer to input/working data buffer
+                      int numSamples                            ///< Number of samples in buffer
                      );


@ -156,4 +159,6 @@ public:
    float getBpm();
 };

+}
+
 #endif // _BPMDetect_H_
--- a/desmume/src/metaspu/SoundTouch/FIFOSampleBuffer.cpp
+++ b/desmume/src/metaspu/SoundTouch/FIFOSampleBuffer.cpp
@ -15,10 +15,10 @@
 ///
 ////////////////////////////////////////////////////////////////////////////////
 //
-// Last changed  : $Date: 2006/02/05 16:44:06 $
-// File revision : $Revision: 1.11 $
+// Last changed  : $Date: 2012-11-08 16:53:01 -0200 (qui, 08 nov 2012) $
+// File revision : $Revision: 4 $
 //
-// $Id: FIFOSampleBuffer.cpp,v 1.11 2006/02/05 16:44:06 Olli Exp $
+// $Id: FIFOSampleBuffer.cpp 160 2012-11-08 18:53:01Z oparviai $
 //
 ////////////////////////////////////////////////////////////////////////////////
 //
@ -47,21 +47,22 @@
 #include <memory.h>
 #include <string.h>
 #include <assert.h>
-#include <stdexcept>

 #include "FIFOSampleBuffer.h"

 using namespace soundtouch;

 // Constructor
-FIFOSampleBuffer::FIFOSampleBuffer(uint numChannels)
+FIFOSampleBuffer::FIFOSampleBuffer(int numChannels)
 {
+    assert(numChannels > 0);
    sizeInBytes = 0; // reasonable initial value
-    buffer = NULL;  //new SAMPLETYPE[sizeInBytes / sizeof(SAMPLETYPE)];
+    buffer = NULL;
    bufferUnaligned = NULL;
    samplesInBuffer = 0;
    bufferPos = 0;
-    channels = numChannels;
+    channels = (uint)numChannels;
+    ensureCapacity(32);     // allocate initial capacity 
 }


@ -69,16 +70,19 @@ FIFOSampleBuffer::FIFOSampleBuffer(uint numChannels)
 FIFOSampleBuffer::~FIFOSampleBuffer()
 {
    delete[] bufferUnaligned;
+    bufferUnaligned = NULL;
+    buffer = NULL;
 }


 // Sets number of channels, 1 = mono, 2 = stereo
-void FIFOSampleBuffer::setChannels(const uint numChannels)
+void FIFOSampleBuffer::setChannels(int numChannels)
 {
    uint usedBytes;

+    assert(numChannels > 0);
    usedBytes = channels * samplesInBuffer;
-    channels = numChannels;
+    channels = (uint)numChannels;
    samplesInBuffer = usedBytes / channels;
 }

@ -88,7 +92,7 @@ void FIFOSampleBuffer::setChannels(const uint numChannels)
 // location on to the beginning of the buffer.
 void FIFOSampleBuffer::rewind()
 {
-    if (bufferPos) 
+    if (buffer && bufferPos) 
    {
        memmove(buffer, ptrBegin(), sizeof(SAMPLETYPE) * channels * samplesInBuffer);
        bufferPos = 0;
@ -98,10 +102,10 @@ void FIFOSampleBuffer::rewind()

 // Adds 'numSamples' pcs of samples from the 'samples' memory position to 
 // the sample buffer.
-void FIFOSampleBuffer::putSamples(const SAMPLETYPE *samples, uint numSamples)
+void FIFOSampleBuffer::putSamples(const SAMPLETYPE *samples, uint nSamples)
 {
-    memcpy(ptrEnd(numSamples), samples, sizeof(SAMPLETYPE) * numSamples * channels);
-    samplesInBuffer += numSamples;
+    memcpy(ptrEnd(nSamples), samples, sizeof(SAMPLETYPE) * nSamples * channels);
+    samplesInBuffer += nSamples;
 }


@ -111,13 +115,13 @@ void FIFOSampleBuffer::putSamples(const SAMPLETYPE *samples, uint numSamples)
 // This function is used to update the number of samples in the sample buffer
 // when accessing the buffer directly with 'ptrEnd' function. Please be 
 // careful though!
-void FIFOSampleBuffer::putSamples(uint numSamples)
+void FIFOSampleBuffer::putSamples(uint nSamples)
 {
    uint req;

-    req = samplesInBuffer + numSamples;
+    req = samplesInBuffer + nSamples;
    ensureCapacity(req);
-    samplesInBuffer += numSamples;
+    samplesInBuffer += nSamples;
 }


@ -147,8 +151,9 @@ SAMPLETYPE *FIFOSampleBuffer::ptrEnd(uint slackCapacity)
 // When using this function to output samples, also remember to 'remove' the
 // outputted samples from the buffer by calling the 
 // 'receiveSamples(numSamples)' function
-SAMPLETYPE *FIFOSampleBuffer::ptrBegin() const
+SAMPLETYPE *FIFOSampleBuffer::ptrBegin()
 {
+    assert(buffer);
    return buffer + bufferPos * channels;
 }

@ -164,15 +169,19 @@ void FIFOSampleBuffer::ensureCapacity(uint capacityRequirement)
    if (capacityRequirement > getCapacity()) 
    {
        // enlarge the buffer in 4kbyte steps (round up to next 4k boundary)
-        sizeInBytes = (capacityRequirement * channels * sizeof(SAMPLETYPE) + 4095) & -4096;
+        sizeInBytes = (capacityRequirement * channels * sizeof(SAMPLETYPE) + 4095) & (uint)-4096;
        assert(sizeInBytes % 2 == 0);
        tempUnaligned = new SAMPLETYPE[sizeInBytes / sizeof(SAMPLETYPE) + 16 / sizeof(SAMPLETYPE)];
        if (tempUnaligned == NULL)
        {
-            throw std::runtime_error("Couldn't allocate memory!\n");
+            ST_THROW_RT_ERROR("Couldn't allocate memory!\n");
+        }
+        // Align the buffer to begin at 16byte cache line boundary for optimal performance
+        temp = (SAMPLETYPE *)SOUNDTOUCH_ALIGN_POINTER_16(tempUnaligned);
+        if (samplesInBuffer)
+        {
+            memcpy(temp, ptrBegin(), samplesInBuffer * channels * sizeof(SAMPLETYPE));
        }
-        temp = (SAMPLETYPE *)(((ulongptr)tempUnaligned + 15) & -16);
-        memcpy(temp, ptrBegin(), samplesInBuffer * channels * sizeof(SAMPLETYPE));
        delete[] bufferUnaligned;
        buffer = temp;
        bufferUnaligned = tempUnaligned;
@ -250,3 +259,16 @@ void FIFOSampleBuffer::clear()
    samplesInBuffer = 0;
    bufferPos = 0;
 }
+
+
+/// allow trimming (downwards) amount of samples in pipeline.
+/// Returns adjusted amount of samples
+uint FIFOSampleBuffer::adjustAmountOfSamples(uint numSamples)
+{
+    if (numSamples < samplesInBuffer)
+    {
+        samplesInBuffer = numSamples;
+    }
+    return samplesInBuffer;
+}
+
--- a/desmume/src/metaspu/SoundTouch/FIFOSampleBuffer.h
+++ b/desmume/src/metaspu/SoundTouch/FIFOSampleBuffer.h
@ -15,10 +15,10 @@
 ///
 ////////////////////////////////////////////////////////////////////////////////
 //
-// Last changed  : $Date: 2006/02/05 16:44:06 $
-// File revision : $Revision: 1.9 $
+// Last changed  : $Date: 2012-06-13 16:29:53 -0300 (qua, 13 jun 2012) $
+// File revision : $Revision: 4 $
 //
-// $Id: FIFOSampleBuffer.h,v 1.9 2006/02/05 16:44:06 Olli Exp $
+// $Id: FIFOSampleBuffer.h 143 2012-06-13 19:29:53Z oparviai $
 //
 ////////////////////////////////////////////////////////////////////////////////
 //
@ -85,15 +85,15 @@ private:
    void rewind();

    /// Ensures that the buffer has capacity for at least this many samples.
-    void ensureCapacity(const uint capacityRequirement);
+    void ensureCapacity(uint capacityRequirement);

    /// Returns current capacity.
    uint getCapacity() const;
- 
+
 public:

    /// Constructor
-    FIFOSampleBuffer(uint numChannels = 2     ///< Number of channels, 1=mono, 2=stereo.
+    FIFOSampleBuffer(int numChannels = 2     ///< Number of channels, 1=mono, 2=stereo.
                                              ///< Default is stereo.
                     );

@ -107,7 +107,7 @@ public:
    /// When using this function to output samples, also remember to 'remove' the
    /// output samples from the buffer by calling the 
    /// 'receiveSamples(numSamples)' function
-    virtual SAMPLETYPE *ptrBegin() const;
+    virtual SAMPLETYPE *ptrBegin();

    /// Returns a pointer to the end of the used part of the sample buffer (i.e. 
    /// where the new samples are to be inserted). This function may be used for 
@ -160,13 +160,17 @@ public:
    virtual uint numSamples() const;

    /// Sets number of channels, 1 = mono, 2 = stereo.
-    void setChannels(uint numChannels);
+    void setChannels(int numChannels);

    /// Returns nonzero if there aren't any samples available for outputting.
    virtual int isEmpty() const;

    /// Clears all the samples.
    virtual void clear();
+
+    /// allow trimming (downwards) amount of samples in pipeline.
+    /// Returns adjusted amount of samples
+    uint adjustAmountOfSamples(uint numSamples);
 };

 }
--- a/desmume/src/metaspu/SoundTouch/FIFOSamplePipe.h
+++ b/desmume/src/metaspu/SoundTouch/FIFOSamplePipe.h
@ -17,10 +17,10 @@
 ///
 ////////////////////////////////////////////////////////////////////////////////
 //
-// Last changed  : $Date: 2006/02/05 16:44:06 $
-// File revision : $Revision: 1.8 $
+// Last changed  : $Date: 2012-06-13 16:29:53 -0300 (qua, 13 jun 2012) $
+// File revision : $Revision: 4 $
 //
-// $Id: FIFOSamplePipe.h,v 1.8 2006/02/05 16:44:06 Olli Exp $
+// $Id: FIFOSamplePipe.h 143 2012-06-13 19:29:53Z oparviai $
 //
 ////////////////////////////////////////////////////////////////////////////////
 //
@ -59,6 +59,10 @@ namespace soundtouch
 class FIFOSamplePipe
 {
 public:
+    // virtual default destructor
+    virtual ~FIFOSamplePipe() {}
+
+
    /// Returns a pointer to the beginning of the output samples. 
    /// This function is provided for accessing the output samples directly. 
    /// Please be careful for not to corrupt the book-keeping!
@ -66,12 +70,12 @@ public:
    /// When using this function to output samples, also remember to 'remove' the
    /// output samples from the buffer by calling the 
    /// 'receiveSamples(numSamples)' function
-    virtual SAMPLETYPE *ptrBegin() const = 0;
+    virtual SAMPLETYPE *ptrBegin() = 0;

    /// Adds 'numSamples' pcs of samples from the 'samples' memory position to
    /// the sample buffer.
    virtual void putSamples(const SAMPLETYPE *samples,  ///< Pointer to samples.
-                            uint numSamples                         ///< Number of samples to insert.
+                            uint numSamples             ///< Number of samples to insert.
                            ) = 0;


@ -110,6 +114,11 @@ public:

    /// Clears all the samples.
    virtual void clear() = 0;
+
+    /// allow trimming (downwards) amount of samples in pipeline.
+    /// Returns adjusted amount of samples
+    virtual uint adjustAmountOfSamples(uint numSamples) = 0;
+
 };


@ -166,7 +175,7 @@ protected:
    /// When using this function to output samples, also remember to 'remove' the
    /// output samples from the buffer by calling the 
    /// 'receiveSamples(numSamples)' function
-    virtual SAMPLETYPE *ptrBegin() const
+    virtual SAMPLETYPE *ptrBegin()
    {
        return output->ptrBegin();
    }
@ -210,6 +219,14 @@ public:
    {
        return output->isEmpty();
    }
+
+    /// allow trimming (downwards) amount of samples in pipeline.
+    /// Returns adjusted amount of samples
+    virtual uint adjustAmountOfSamples(uint numSamples)
+    {
+        return output->adjustAmountOfSamples(numSamples);
+    }
+
 };

 }
--- a/desmume/src/metaspu/SoundTouch/FIRFilter.cpp
+++ b/desmume/src/metaspu/SoundTouch/FIRFilter.cpp
@ -11,10 +11,10 @@
 ///
 ////////////////////////////////////////////////////////////////////////////////
 //
-// Last changed  : $Date: 2006/02/05 16:44:06 $
-// File revision : $Revision: 1.16 $
+// Last changed  : $Date: 2011-09-02 15:56:11 -0300 (sex, 02 set 2011) $
+// File revision : $Revision: 4 $
 //
-// $Id: FIRFilter.cpp,v 1.16 2006/02/05 16:44:06 Olli Exp $
+// $Id: FIRFilter.cpp 131 2011-09-02 18:56:11Z oparviai $
 //
 ////////////////////////////////////////////////////////////////////////////////
 //
@ -43,7 +43,6 @@
 #include <assert.h>
 #include <math.h>
 #include <stdlib.h>
-#include <stdexcept>
 #include "FIRFilter.h"
 #include "cpu_detect.h"

@ -58,6 +57,7 @@ using namespace soundtouch;
 FIRFilter::FIRFilter()
 {
    resultDivFactor = 0;
+    resultDivider = 0;
    length = 0;
    lengthDiv8 = 0;
    filterCoeffs = NULL;
@ -74,13 +74,16 @@ uint FIRFilter::evaluateFilterStereo(SAMPLETYPE *dest, const SAMPLETYPE *src, ui
 {
    uint i, j, end;
    LONG_SAMPLETYPE suml, sumr;
-#ifdef FLOAT_SAMPLES
+#ifdef SOUNDTOUCH_FLOAT_SAMPLES
    // when using floating point samples, use a scaler instead of a divider
    // because division is much slower operation than multiplying.
    double dScaler = 1.0 / (double)resultDivider;
 #endif

    assert(length != 0);
+    assert(src != NULL);
+    assert(dest != NULL);
+    assert(filterCoeffs != NULL);

    end = 2 * (numSamples - length);

@ -104,7 +107,7 @@ uint FIRFilter::evaluateFilterStereo(SAMPLETYPE *dest, const SAMPLETYPE *src, ui
                    ptr[2 * i + 7] * filterCoeffs[i + 3];
        }

-#ifdef INTEGER_SAMPLES
+#ifdef SOUNDTOUCH_INTEGER_SAMPLES
        suml >>= resultDivFactor;
        sumr >>= resultDivFactor;
        // saturate to 16 bit integer limits
@ -114,7 +117,7 @@ uint FIRFilter::evaluateFilterStereo(SAMPLETYPE *dest, const SAMPLETYPE *src, ui
 #else
        suml *= dScaler;
        sumr *= dScaler;
-#endif // INTEGER_SAMPLES
+#endif // SOUNDTOUCH_INTEGER_SAMPLES
        dest[j] = (SAMPLETYPE)suml;
        dest[j + 1] = (SAMPLETYPE)sumr;
    }
@ -129,7 +132,7 @@ uint FIRFilter::evaluateFilterMono(SAMPLETYPE *dest, const SAMPLETYPE *src, uint
 {
    uint i, j, end;
    LONG_SAMPLETYPE sum;
-#ifdef FLOAT_SAMPLES
+#ifdef SOUNDTOUCH_FLOAT_SAMPLES
    // when using floating point samples, use a scaler instead of a divider
    // because division is much slower operation than multiplying.
    double dScaler = 1.0 / (double)resultDivider;
@ -150,13 +153,13 @@ uint FIRFilter::evaluateFilterMono(SAMPLETYPE *dest, const SAMPLETYPE *src, uint
                   src[i + 2] * filterCoeffs[i + 2] + 
                   src[i + 3] * filterCoeffs[i + 3];
        }
-#ifdef INTEGER_SAMPLES
+#ifdef SOUNDTOUCH_INTEGER_SAMPLES
        sum >>= resultDivFactor;
        // saturate to 16 bit integer limits
        sum = (sum < -32768) ? -32768 : (sum > 32767) ? 32767 : sum;
 #else
        sum *= dScaler;
-#endif // INTEGER_SAMPLES
+#endif // SOUNDTOUCH_INTEGER_SAMPLES
        dest[j] = (SAMPLETYPE)sum;
        src ++;
    }
@ -170,18 +173,14 @@ uint FIRFilter::evaluateFilterMono(SAMPLETYPE *dest, const SAMPLETYPE *src, uint
 void FIRFilter::setCoefficients(const SAMPLETYPE *coeffs, uint newLength, uint uResultDivFactor)
 {
    assert(newLength > 0);
-    if (newLength % 8) throw std::runtime_error("FIR filter length not divisible by 8");
+    if (newLength % 8) ST_THROW_RT_ERROR("FIR filter length not divisible by 8");

    lengthDiv8 = newLength / 8;
    length = lengthDiv8 * 8;
    assert(length == newLength);

    resultDivFactor = uResultDivFactor;
-#ifdef INTEGER_SAMPLES
-    resultDivider = (SAMPLETYPE)(1<<resultDivFactor);
-#else
-    resultDivider = (SAMPLETYPE)powf(2, (SAMPLETYPE)resultDivFactor);
-#endif
+    resultDivider = (SAMPLETYPE)::pow(2.0, (int)resultDivFactor);

    delete[] filterCoeffs;
    filterCoeffs = new SAMPLETYPE[length];
@ -207,7 +206,6 @@ uint FIRFilter::evaluate(SAMPLETYPE *dest, const SAMPLETYPE *src, uint numSample
    assert(length > 0);
    assert(lengthDiv8 * 8 == length);
    if (numSamples < length) return 0;
-    assert(resultDivFactor >= 0);
    if (numChannels == 2) 
    {
        return evaluateFilterStereo(dest, src, numSamples);
@ -223,46 +221,36 @@ uint FIRFilter::evaluate(SAMPLETYPE *dest, const SAMPLETYPE *src, uint numSample
 void * FIRFilter::operator new(size_t s)
 {
    // Notice! don't use "new FIRFilter" directly, use "newInstance" to create a new instance instead!
-    throw std::runtime_error("Don't use 'new FIRFilter', use 'newInstance' member instead!");
-    return NULL;
+    ST_THROW_RT_ERROR("Error in FIRFilter::new: Don't use 'new FIRFilter', use 'newInstance' member instead!");
+    return newInstance();
 }


 FIRFilter * FIRFilter::newInstance()
 {
-    uint uExtensions = 0;
+    uint uExtensions;

-#if !defined(_MSC_VER) || !defined(__x86_64__)
    uExtensions = detectCPUextensions();
-#endif
-    // Check if MMX/SSE/3DNow! instruction set extensions supported by CPU

-#ifdef ALLOW_MMX
+    // Check if MMX/SSE instruction set extensions supported by CPU
+
+#ifdef SOUNDTOUCH_ALLOW_MMX
    // MMX routines available only with integer sample types
    if (uExtensions & SUPPORT_MMX)
    {
        return ::new FIRFilterMMX;
    }
    else
-#endif // ALLOW_MMX
+#endif // SOUNDTOUCH_ALLOW_MMX

-#ifdef ALLOW_SSE
+#ifdef SOUNDTOUCH_ALLOW_SSE
    if (uExtensions & SUPPORT_SSE)
    {
        // SSE support
        return ::new FIRFilterSSE;
    }
    else
-#endif // ALLOW_SSE
-
-#ifdef ALLOW_3DNOW
-    if (uExtensions & SUPPORT_3DNOW)
-    {
-        // 3DNow! support
-        return ::new FIRFilter3DNow;
-    }
-    else
-#endif // ALLOW_3DNOW
+#endif // SOUNDTOUCH_ALLOW_SSE

    {
        // ISA optimizations not supported, use plain C version
--- a/desmume/src/metaspu/SoundTouch/FIRFilter.h
+++ b/desmume/src/metaspu/SoundTouch/FIRFilter.h
@ -11,10 +11,10 @@
 ///
 ////////////////////////////////////////////////////////////////////////////////
 //
-// Last changed  : $Date: 2006/02/05 16:44:06 $
-// File revision : $Revision: 1.17 $
+// Last changed  : $Date: 2011-02-13 17:13:57 -0200 (dom, 13 fev 2011) $
+// File revision : $Revision: 4 $
 //
-// $Id: FIRFilter.h,v 1.17 2006/02/05 16:44:06 Olli Exp $
+// $Id: FIRFilter.h 104 2011-02-13 19:13:57Z oparviai $
 //
 ////////////////////////////////////////////////////////////////////////////////
 //
@ -42,6 +42,7 @@
 #ifndef FIRFilter_H
 #define FIRFilter_H

+#include <stddef.h>
 #include "STTypes.h"

 namespace soundtouch
@ -77,7 +78,7 @@ public:

    /// Operator 'new' is overloaded so that it automatically creates a suitable instance 
    /// depending on if we've a MMX-capable CPU available or not.
-    void * operator new(size_t s);
+    static void * operator new(size_t s);

    static FIRFilter *newInstance();

@ -101,9 +102,9 @@ public:

 // Optional subclasses that implement CPU-specific optimizations:

-#ifdef ALLOW_MMX
+#ifdef SOUNDTOUCH_ALLOW_MMX

-    /// Class that implements MMX optimized functions exclusive for 16bit integer samples type.
+/// Class that implements MMX optimized functions exclusive for 16bit integer samples type.
    class FIRFilterMMX : public FIRFilter
    {
    protected:
@ -118,29 +119,10 @@ public:
        virtual void setCoefficients(const short *coeffs, uint newLength, uint uResultDivFactor);
    };

-#endif // ALLOW_MMX
+#endif // SOUNDTOUCH_ALLOW_MMX


-#ifdef ALLOW_3DNOW
-
-    /// Class that implements 3DNow! optimized functions exclusive for floating point samples type.
-    class FIRFilter3DNow : public FIRFilter
-    {
-    protected:
-        float *filterCoeffsUnalign;
-        float *filterCoeffsAlign;
-
-        virtual uint evaluateFilterStereo(float *dest, const float *src, uint numSamples) const;
-    public:
-        FIRFilter3DNow();
-        ~FIRFilter3DNow();
-        virtual void setCoefficients(const float *coeffs, uint newLength, uint uResultDivFactor);
-    };
-
-#endif  // ALLOW_3DNOW
-
-
-#ifdef ALLOW_SSE
+#ifdef SOUNDTOUCH_ALLOW_SSE
    /// Class that implements SSE optimized functions exclusive for floating point samples type.
    class FIRFilterSSE : public FIRFilter
    {
@ -156,7 +138,7 @@ public:
        virtual void setCoefficients(const float *coeffs, uint newLength, uint uResultDivFactor);
    };

-#endif // ALLOW_SSE
+#endif // SOUNDTOUCH_ALLOW_SSE

 }

--- a/desmume/src/metaspu/SoundTouch/RateTransposer.cpp
+++ b/desmume/src/metaspu/SoundTouch/RateTransposer.cpp
@ -10,10 +10,10 @@
 ///
 ////////////////////////////////////////////////////////////////////////////////
 //
-// Last changed  : $Date: 2006/03/19 10:05:49 $
-// File revision : $Revision: 1.13 $
+// Last changed  : $Date: 2011-09-02 15:56:11 -0300 (sex, 02 set 2011) $
+// File revision : $Revision: 4 $
 //
-// $Id: RateTransposer.cpp,v 1.13 2006/03/19 10:05:49 Olli Exp $
+// $Id: RateTransposer.cpp 131 2011-09-02 18:56:11Z oparviai $
 //
 ////////////////////////////////////////////////////////////////////////////////
 //
@ -42,7 +42,6 @@
 #include <assert.h>
 #include <stdlib.h>
 #include <stdio.h>
-#include <limits.h>
 #include "RateTransposer.h"
 #include "AAFilter.h"

@ -55,7 +54,7 @@ class RateTransposerInteger : public RateTransposer
 {
 protected:
    int iSlopeCount;
-    uint uRate;
+    int iRate;
    SAMPLETYPE sPrevSampleL, sPrevSampleR;

    virtual void resetRegisters();
@ -84,7 +83,6 @@ class RateTransposerFloat : public RateTransposer
 {
 protected:
    float fSlopeCount;
-    float fRateStep;
    SAMPLETYPE sPrevSampleL, sPrevSampleR;

    virtual void resetRegisters();
@ -103,25 +101,19 @@ public:



-#ifndef min
-#define min(a,b) ((a > b) ? b : a)
-#define max(a,b) ((a < b) ? b : a)
-#endif
-

 // Operator 'new' is overloaded so that it automatically creates a suitable instance 
 // depending on if we've a MMX/SSE/etc-capable CPU available or not.
 void * RateTransposer::operator new(size_t s)
 {
-    // Notice! don't use "new TDStretch" directly, use "newInstance" to create a new instance instead!
-    assert(FALSE);  
-    return NULL;
+    ST_THROW_RT_ERROR("Error in RateTransoser::new: don't use \"new TDStretch\" directly, use \"newInstance\" to create a new instance instead!");
+    return newInstance();
 }


 RateTransposer *RateTransposer::newInstance()
 {
-#ifdef INTEGER_SAMPLES
+#ifdef SOUNDTOUCH_INTEGER_SAMPLES
    return ::new RateTransposerInteger;
 #else
    return ::new RateTransposerFloat;
@ -132,8 +124,9 @@ RateTransposer *RateTransposer::newInstance()
 // Constructor
 RateTransposer::RateTransposer() : FIFOProcessor(&outputBuffer)
 {
-    uChannels = 2;
+    numChannels = 2;
    bUseAAFilter = TRUE;
+    fRate = 0;

    // Instantiates the anti-alias filter with default tap length
    // of 32
@ -150,7 +143,7 @@ RateTransposer::~RateTransposer()


 /// Enables/disables the anti-alias filter. Zero to disable, nonzero to enable
-void RateTransposer::enableAAFilter(const BOOL newMode)
+void RateTransposer::enableAAFilter(BOOL newMode)
 {
    bUseAAFilter = newMode;
 }
@ -163,18 +156,18 @@ BOOL RateTransposer::isAAFilterEnabled() const
 }


-AAFilter *RateTransposer::getAAFilter() const
+AAFilter *RateTransposer::getAAFilter()
 {
    return pAAFilter;
 }



-// Sets new target uRate. Normal uRate = 1.0, smaller values represent slower 
-// uRate, larger faster uRates.
+// Sets new target iRate. Normal iRate = 1.0, smaller values represent slower 
+// iRate, larger faster iRates.
 void RateTransposer::setRate(float newRate)
 {
-    float fCutoff;
+    double fCutoff;

    fRate = newRate;

@ -197,45 +190,47 @@ void RateTransposer::setRate(float newRate)
 //
 // It's allowed for 'output' and 'input' parameters to point to the same
 // memory position.
+/*
 void RateTransposer::flushStoreBuffer()
 {
    if (storeBuffer.isEmpty()) return;

    outputBuffer.moveSamples(storeBuffer);
 }
+*/


-// Adds 'numSamples' pcs of samples from the 'samples' memory position into
+// Adds 'nSamples' pcs of samples from the 'samples' memory position into
 // the input of the object.
-void RateTransposer::putSamples(const SAMPLETYPE *samples, uint numSamples)
+void RateTransposer::putSamples(const SAMPLETYPE *samples, uint nSamples)
 {
-    processSamples(samples, numSamples);
+    processSamples(samples, nSamples);
 }



 // Transposes up the sample rate, causing the observed playback 'rate' of the
 // sound to decrease
-void RateTransposer::upsample(const SAMPLETYPE *src, uint numSamples)
+void RateTransposer::upsample(const SAMPLETYPE *src, uint nSamples)
 {
-    int count, sizeTemp, num;
+    uint count, sizeTemp, num;

    // If the parameter 'uRate' value is smaller than 'SCALE', first transpose
    // the samples and then apply the anti-alias filter to remove aliasing.

    // First check that there's enough room in 'storeBuffer' 
    // (+16 is to reserve some slack in the destination buffer)
-    sizeTemp = (int)((float)numSamples / fRate + 16.0f);
+    sizeTemp = (uint)((float)nSamples / fRate + 16.0f);

    // Transpose the samples, store the result into the end of "storeBuffer"
-    count = transpose(storeBuffer.ptrEnd(sizeTemp), src, numSamples);
+    count = transpose(storeBuffer.ptrEnd(sizeTemp), src, nSamples);
    storeBuffer.putSamples(count);

    // Apply the anti-alias filter to samples in "store output", output the
    // result to "dest"
    num = storeBuffer.numSamples();
    count = pAAFilter->evaluate(outputBuffer.ptrEnd(num), 
-        storeBuffer.ptrBegin(), num, uChannels);
+        storeBuffer.ptrBegin(), num, (uint)numChannels);
    outputBuffer.putSamples(count);

    // Remove the processed samples from "storeBuffer"
@ -245,16 +240,16 @@ void RateTransposer::upsample(const SAMPLETYPE *src, uint numSamples)

 // Transposes down the sample rate, causing the observed playback 'rate' of the
 // sound to increase
-void RateTransposer::downsample(const SAMPLETYPE *src, uint numSamples)
+void RateTransposer::downsample(const SAMPLETYPE *src, uint nSamples)
 {
-    int count, sizeTemp;
+    uint count, sizeTemp;

    // If the parameter 'uRate' value is larger than 'SCALE', first apply the
    // anti-alias filter to remove high frequencies (prevent them from folding
-    // over the lover frequencies), then transpose. */
+    // over the lover frequencies), then transpose.

-    // Add the new samples to the end of the storeBuffer */
-    storeBuffer.putSamples(src, numSamples);
+    // Add the new samples to the end of the storeBuffer
+    storeBuffer.putSamples(src, nSamples);

    // Anti-alias filter the samples to prevent folding and output the filtered 
    // data to tempBuffer. Note : because of the FIR filter length, the
@ -263,13 +258,15 @@ void RateTransposer::downsample(const SAMPLETYPE *src, uint numSamples)
    sizeTemp = storeBuffer.numSamples();

    count = pAAFilter->evaluate(tempBuffer.ptrEnd(sizeTemp), 
-        storeBuffer.ptrBegin(), sizeTemp, uChannels);
+        storeBuffer.ptrBegin(), sizeTemp, (uint)numChannels);
+
+	if (count == 0) return;

    // Remove the filtered samples from 'storeBuffer'
    storeBuffer.receiveSamples(count);

    // Transpose the samples (+16 is to reserve some slack in the destination buffer)
-    sizeTemp = (int)((float)numSamples / fRate + 16.0f);
+    sizeTemp = (uint)((float)nSamples / fRate + 16.0f);
    count = transpose(outputBuffer.ptrEnd(sizeTemp), tempBuffer.ptrBegin(), count);
    outputBuffer.putSamples(count);
 }
@ -279,20 +276,20 @@ void RateTransposer::downsample(const SAMPLETYPE *src, uint numSamples)
 // Returns amount of samples returned in the "dest" buffer.
 // The maximum amount of samples that can be returned at a time is set by
 // the 'set_returnBuffer_size' function.
-void RateTransposer::processSamples(const SAMPLETYPE *src, uint numSamples)
+void RateTransposer::processSamples(const SAMPLETYPE *src, uint nSamples)
 {
    uint count;
    uint sizeReq;

-    if (numSamples == 0) return;
+    if (nSamples == 0) return;
    assert(pAAFilter);

    // If anti-alias filter is turned off, simply transpose without applying
    // the filter
    if (bUseAAFilter == FALSE) 
    {
-        sizeReq = (int)((float)numSamples / fRate + 1.0f);
-        count = transpose(outputBuffer.ptrEnd(sizeReq), src, numSamples);
+        sizeReq = (uint)((float)nSamples / fRate + 1.0f);
+        count = transpose(outputBuffer.ptrEnd(sizeReq), src, nSamples);
        outputBuffer.putSamples(count);
        return;
    }
@ -300,41 +297,42 @@ void RateTransposer::processSamples(const SAMPLETYPE *src, uint numSamples)
    // Transpose with anti-alias filter
    if (fRate < 1.0f) 
    {
-        upsample(src, numSamples);
+        upsample(src, nSamples);
    } 
    else  
    {
-        downsample(src, numSamples);
+        downsample(src, nSamples);
    }
 }


 // Transposes the sample rate of the given samples using linear interpolation. 
 // Returns the number of samples returned in the "dest" buffer
-inline uint RateTransposer::transpose(SAMPLETYPE *dest, const SAMPLETYPE *src, uint numSamples)
+inline uint RateTransposer::transpose(SAMPLETYPE *dest, const SAMPLETYPE *src, uint nSamples)
 {
-    if (uChannels == 2) 
+    if (numChannels == 2) 
    {
-        return transposeStereo(dest, src, numSamples);
+        return transposeStereo(dest, src, nSamples);
    } 
    else 
    {
-        return transposeMono(dest, src, numSamples);
+        return transposeMono(dest, src, nSamples);
    }
 }


 // Sets the number of channels, 1 = mono, 2 = stereo
-void RateTransposer::setChannels(const uint numchannels)
+void RateTransposer::setChannels(int nChannels)
 {
-    if (uChannels == numchannels) return;
+    assert(nChannels > 0);
+    if (numChannels == nChannels) return;

-    assert(numchannels == 1 || numchannels == 2);
-    uChannels = numchannels;
+    assert(nChannels == 1 || nChannels == 2);
+    numChannels = nChannels;

-    storeBuffer.setChannels(uChannels);
-    tempBuffer.setChannels(uChannels);
-    outputBuffer.setChannels(uChannels);
+    storeBuffer.setChannels(numChannels);
+    tempBuffer.setChannels(numChannels);
+    outputBuffer.setChannels(numChannels);

    // Inits the linear interpolation registers
    resetRegisters();
@ -350,7 +348,7 @@ void RateTransposer::clear()


 // Returns nonzero if there aren't any samples available for outputting.
-uint RateTransposer::isEmpty()
+int RateTransposer::isEmpty() const
 {
    int res;

@ -371,11 +369,10 @@ uint RateTransposer::isEmpty()
 // Constructor
 RateTransposerInteger::RateTransposerInteger() : RateTransposer()
 {
-    // call these here as these are virtual functions; calling these
-    // from the base class constructor wouldn't execute the overloaded
-    // versions (<master yoda>peculiar C++ can be</my>).
-    resetRegisters();
-    setRate(1.0f);
+    // Notice: use local function calling syntax for sake of clarity, 
+    // to indicate the fact that C++ constructor can't call virtual functions.
+    RateTransposerInteger::resetRegisters();
+    RateTransposerInteger::setRate(1.0f);
 }


@ -396,12 +393,14 @@ void RateTransposerInteger::resetRegisters()
 // Transposes the sample rate of the given samples using linear interpolation. 
 // 'Mono' version of the routine. Returns the number of samples returned in 
 // the "dest" buffer
-uint RateTransposerInteger::transposeMono(SAMPLETYPE *dest, const SAMPLETYPE *src, uint numSamples)
+uint RateTransposerInteger::transposeMono(SAMPLETYPE *dest, const SAMPLETYPE *src, uint nSamples)
 {
    unsigned int i, used;
    LONG_SAMPLETYPE temp, vol1;

-    used = 0;    
+    if (nSamples == 0) return 0;  // no samples, no work
+
+	used = 0;    
    i = 0;

    // Process the last sample saved from the previous call first...
@ -411,7 +410,7 @@ uint RateTransposerInteger::transposeMono(SAMPLETYPE *dest, const SAMPLETYPE *sr
        temp = vol1 * sPrevSampleL + iSlopeCount * src[0];
        dest[i] = (SAMPLETYPE)(temp / SCALE);
        i++;
-        iSlopeCount += uRate;
+        iSlopeCount += iRate;
    }
    // now always (iSlopeCount > SCALE)
    iSlopeCount -= SCALE;
@ -422,18 +421,18 @@ uint RateTransposerInteger::transposeMono(SAMPLETYPE *dest, const SAMPLETYPE *sr
        {
            iSlopeCount -= SCALE;
            used ++;
-            if (used >= numSamples - 1) goto end;
+            if (used >= nSamples - 1) goto end;
        }
        vol1 = (LONG_SAMPLETYPE)(SCALE - iSlopeCount);
        temp = src[used] * vol1 + iSlopeCount * src[used + 1];
        dest[i] = (SAMPLETYPE)(temp / SCALE);

        i++;
-        iSlopeCount += uRate;
+        iSlopeCount += iRate;
    }
 end:
    // Store the last sample for the next round
-    sPrevSampleL = src[numSamples - 1];
+    sPrevSampleL = src[nSamples - 1];

    return i;
 }
@ -442,12 +441,12 @@ end:
 // Transposes the sample rate of the given samples using linear interpolation. 
 // 'Stereo' version of the routine. Returns the number of samples returned in 
 // the "dest" buffer
-uint RateTransposerInteger::transposeStereo(SAMPLETYPE *dest, const SAMPLETYPE *src, uint numSamples)
+uint RateTransposerInteger::transposeStereo(SAMPLETYPE *dest, const SAMPLETYPE *src, uint nSamples)
 {
    unsigned int srcPos, i, used;
    LONG_SAMPLETYPE temp, vol1;

-    if (numSamples == 0) return 0;  // no samples, no work
+    if (nSamples == 0) return 0;  // no samples, no work

    used = 0;    
    i = 0;
@ -461,7 +460,7 @@ uint RateTransposerInteger::transposeStereo(SAMPLETYPE *dest, const SAMPLETYPE *
        temp = vol1 * sPrevSampleR + iSlopeCount * src[1];
        dest[2 * i + 1] = (SAMPLETYPE)(temp / SCALE);
        i++;
-        iSlopeCount += uRate;
+        iSlopeCount += iRate;
    }
    // now always (iSlopeCount > SCALE)
    iSlopeCount -= SCALE;
@ -472,7 +471,7 @@ uint RateTransposerInteger::transposeStereo(SAMPLETYPE *dest, const SAMPLETYPE *
        {
            iSlopeCount -= SCALE;
            used ++;
-            if (used >= numSamples - 1) goto end;
+            if (used >= nSamples - 1) goto end;
        }
        srcPos = 2 * used;
        vol1 = (LONG_SAMPLETYPE)(SCALE - iSlopeCount);
@ -482,22 +481,22 @@ uint RateTransposerInteger::transposeStereo(SAMPLETYPE *dest, const SAMPLETYPE *
        dest[2 * i + 1] = (SAMPLETYPE)(temp / SCALE);

        i++;
-        iSlopeCount += uRate;
+        iSlopeCount += iRate;
    }
 end:
    // Store the last sample for the next round
-    sPrevSampleL = src[2 * numSamples - 2];
-    sPrevSampleR = src[2 * numSamples - 1];
+    sPrevSampleL = src[2 * nSamples - 2];
+    sPrevSampleR = src[2 * nSamples - 1];

    return i;
 }


-// Sets new target uRate. Normal uRate = 1.0, smaller values represent slower 
-// uRate, larger faster uRates.
+// Sets new target iRate. Normal iRate = 1.0, smaller values represent slower 
+// iRate, larger faster iRates.
 void RateTransposerInteger::setRate(float newRate)
 {
-    uRate = (int)(newRate * SCALE + 0.5f);
+    iRate = (int)(newRate * SCALE + 0.5f);
    RateTransposer::setRate(newRate);
 }

@ -511,11 +510,10 @@ void RateTransposerInteger::setRate(float newRate)
 // Constructor
 RateTransposerFloat::RateTransposerFloat() : RateTransposer()
 {
-    // call these here as these are virtual functions; calling these
-    // from the base class constructor wouldn't execute the overloaded
-    // versions (<master yoda>peculiar C++ can be</my>).
-    resetRegisters();
-    setRate(1.0f);
+    // Notice: use local function calling syntax for sake of clarity, 
+    // to indicate the fact that C++ constructor can't call virtual functions.
+    RateTransposerFloat::resetRegisters();
+    RateTransposerFloat::setRate(1.0f);
 }


@ -536,7 +534,7 @@ void RateTransposerFloat::resetRegisters()
 // Transposes the sample rate of the given samples using linear interpolation. 
 // 'Mono' version of the routine. Returns the number of samples returned in 
 // the "dest" buffer
-uint RateTransposerFloat::transposeMono(SAMPLETYPE *dest, const SAMPLETYPE *src, uint numSamples)
+uint RateTransposerFloat::transposeMono(SAMPLETYPE *dest, const SAMPLETYPE *src, uint nSamples)
 {
    unsigned int i, used;

@ -552,23 +550,24 @@ uint RateTransposerFloat::transposeMono(SAMPLETYPE *dest, const SAMPLETYPE *src,
    }
    fSlopeCount -= 1.0f;

-    if (numSamples == 1) goto end;
-
-    while (1)
+    if (nSamples > 1)
    {
-        while (fSlopeCount > 1.0f) 
+        while (1)
        {
-            fSlopeCount -= 1.0f;
-            used ++;
-            if (used >= numSamples - 1) goto end;
+            while (fSlopeCount > 1.0f) 
+            {
+                fSlopeCount -= 1.0f;
+                used ++;
+                if (used >= nSamples - 1) goto end;
+            }
+            dest[i] = (SAMPLETYPE)((1.0f - fSlopeCount) * src[used] + fSlopeCount * src[used + 1]);
+            i++;
+            fSlopeCount += fRate;
        }
-        dest[i] = (SAMPLETYPE)((1.0f - fSlopeCount) * src[used] + fSlopeCount * src[used + 1]);
-        i++;
-        fSlopeCount += fRate;
    }
 end:
    // Store the last sample for the next round
-    sPrevSampleL = src[numSamples - 1];
+    sPrevSampleL = src[nSamples - 1];

    return i;
 }
@ -577,11 +576,11 @@ end:
 // Transposes the sample rate of the given samples using linear interpolation. 
 // 'Mono' version of the routine. Returns the number of samples returned in 
 // the "dest" buffer
-uint RateTransposerFloat::transposeStereo(SAMPLETYPE *dest, const SAMPLETYPE *src, uint numSamples)
+uint RateTransposerFloat::transposeStereo(SAMPLETYPE *dest, const SAMPLETYPE *src, uint nSamples)
 {
    unsigned int srcPos, i, used;

-    if (numSamples == 0) return 0;  // no samples, no work
+    if (nSamples == 0) return 0;  // no samples, no work

    used = 0;    
    i = 0;
@ -597,30 +596,31 @@ uint RateTransposerFloat::transposeStereo(SAMPLETYPE *dest, const SAMPLETYPE *sr
    // now always (iSlopeCount > 1.0f)
    fSlopeCount -= 1.0f;

-    if (numSamples == 1) goto end;
-
-    while (1)
+    if (nSamples > 1)
    {
-        while (fSlopeCount > 1.0f) 
+        while (1)
        {
-            fSlopeCount -= 1.0f;
-            used ++;
-            if (used >= numSamples - 1) goto end;
+            while (fSlopeCount > 1.0f) 
+            {
+                fSlopeCount -= 1.0f;
+                used ++;
+                if (used >= nSamples - 1) goto end;
+            }
+            srcPos = 2 * used;
+
+            dest[2 * i] = (SAMPLETYPE)((1.0f - fSlopeCount) * src[srcPos] 
+                + fSlopeCount * src[srcPos + 2]);
+            dest[2 * i + 1] = (SAMPLETYPE)((1.0f - fSlopeCount) * src[srcPos + 1] 
+                + fSlopeCount * src[srcPos + 3]);
+
+            i++;
+            fSlopeCount += fRate;
        }
-        srcPos = 2 * used;
-
-        dest[2 * i] = (SAMPLETYPE)((1.0f - fSlopeCount) * src[srcPos] 
-            + fSlopeCount * src[srcPos + 2]);
-        dest[2 * i + 1] = (SAMPLETYPE)((1.0f - fSlopeCount) * src[srcPos + 1] 
-            + fSlopeCount * src[srcPos + 3]);
-
-        i++;
-        fSlopeCount += fRate;
    }
 end:
    // Store the last sample for the next round
-    sPrevSampleL = src[2 * numSamples - 2];
-    sPrevSampleR = src[2 * numSamples - 1];
+    sPrevSampleL = src[2 * nSamples - 2];
+    sPrevSampleR = src[2 * nSamples - 1];

    return i;
 }
--- a/desmume/src/metaspu/SoundTouch/RateTransposer.h
+++ b/desmume/src/metaspu/SoundTouch/RateTransposer.h
@ -14,10 +14,10 @@
 ///
 ////////////////////////////////////////////////////////////////////////////////
 //
-// Last changed  : $Date: 2006/02/05 16:44:06 $
-// File revision : $Revision: 1.10 $
+// Last changed  : $Date: 2009-02-21 13:00:14 -0300 (sáb, 21 fev 2009) $
+// File revision : $Revision: 4 $
 //
-// $Id: RateTransposer.h,v 1.10 2006/02/05 16:44:06 Olli Exp $
+// $Id: RateTransposer.h 63 2009-02-21 16:00:14Z oparviai $
 //
 ////////////////////////////////////////////////////////////////////////////////
 //
@ -45,6 +45,7 @@
 #ifndef RateTransposer_H
 #define RateTransposer_H

+#include <stddef.h>
 #include "AAFilter.h"
 #include "FIFOSamplePipe.h"
 #include "FIFOSampleBuffer.h"
@ -68,7 +69,7 @@ protected:

    float fRate;

-    uint uChannels;
+    int numChannels;

    /// Buffer for collecting samples to feed the anti-alias filter between
    /// two batches
@ -82,8 +83,6 @@ protected:

    BOOL bUseAAFilter;

-    void init();
-
    virtual void resetRegisters() = 0;

    virtual uint transposeStereo(SAMPLETYPE *dest, 
@ -92,12 +91,10 @@ protected:
    virtual uint transposeMono(SAMPLETYPE *dest, 
                       const SAMPLETYPE *src, 
                       uint numSamples) = 0;
-    uint transpose(SAMPLETYPE *dest, 
+    inline uint transpose(SAMPLETYPE *dest, 
                   const SAMPLETYPE *src, 
                   uint numSamples);

-    void flushStoreBuffer();
-
    void downsample(const SAMPLETYPE *src, 
                    uint numSamples);
    void upsample(const SAMPLETYPE *src, 
@ -117,7 +114,7 @@ public:

    /// Operator 'new' is overloaded so that it automatically creates a suitable instance 
    /// depending on if we're to use integer or floating point arithmetics.
-    void *operator new(size_t s);
+    static void *operator new(size_t s);

    /// Use this function instead of "new" operator to create a new instance of this class. 
    /// This function automatically chooses a correct implementation, depending on if 
@ -131,7 +128,7 @@ public:
    FIFOSamplePipe *getStore() { return &storeBuffer; };

    /// Return anti-alias filter object
-    AAFilter *getAAFilter() const;
+    AAFilter *getAAFilter();

    /// Enables/disables the anti-alias filter. Zero to disable, nonzero to enable
    void enableAAFilter(BOOL newMode);
@ -144,7 +141,7 @@ public:
    virtual void setRate(float newRate);

    /// Sets the number of channels, 1 = mono, 2 = stereo
-    void setChannels(uint channels);
+    void setChannels(int channels);

    /// Adds 'numSamples' pcs of samples from the 'samples' memory position into
    /// the input of the object.
@ -154,7 +151,7 @@ public:
    void clear();

    /// Returns nonzero if there aren't any samples available for outputting.
-    uint isEmpty();
+    int isEmpty() const;
 };

 }
--- a/desmume/src/metaspu/SoundTouch/STTypes.h
+++ b/desmume/src/metaspu/SoundTouch/STTypes.h
@ -8,10 +8,10 @@
 ///
 ////////////////////////////////////////////////////////////////////////////////
 //
-// Last changed  : $Date: 2006/02/05 16:44:06 $
-// File revision : $Revision: 1.16 $
+// Last changed  : $Date: 2012-12-28 12:53:56 -0200 (sex, 28 dez 2012) $
+// File revision : $Revision: 3 $
 //
-// $Id: STTypes.h,v 1.16 2006/02/05 16:44:06 Olli Exp $
+// $Id: STTypes.h 162 2012-12-28 14:53:56Z oparviai $
 //
 ////////////////////////////////////////////////////////////////////////////////
 //
@ -39,63 +39,25 @@
 #ifndef STTypes_H
 #define STTypes_H

-//#define INTEGER_SAMPLES 1
-
 typedef unsigned int    uint;
 typedef unsigned long   ulong;

-#ifdef __x86_64__
-typedef unsigned long long   ulongptr;
+// Patch for MinGW: on Win64 long is 32-bit
+#ifdef _WIN64
+    typedef unsigned long long ulongptr;
 #else
-typedef unsigned long   ulongptr;
+    typedef ulong ulongptr;
 #endif


-#ifdef __GNUC__
-    // In GCC, include soundtouch_config.h made by config scritps
-/* Define to 1 if you have the <inttypes.h> header file. */
-#define HAVE_INTTYPES_H 1
+// Helper macro for aligning pointer up to next 16-byte boundary
+#define SOUNDTOUCH_ALIGN_POINTER_16(x)      ( ( (ulongptr)(x) + 15 ) & ~(ulongptr)15 )

-/* Define to 1 if you have the `m' library (-lm). */
-#define HAVE_LIBM 1
-
-/* Define to 1 if your system has a GNU libc compatible `malloc' function, and
-   to 0 otherwise. */
-#define HAVE_MALLOC 1
-
-/* Define to 1 if you have the <memory.h> header file. */
-#define HAVE_MEMORY_H 1
-
-/* Define to 1 if you have the <stdint.h> header file. */
-#define HAVE_STDINT_H 1
-
-/* Define to 1 if you have the <stdlib.h> header file. */
-#define HAVE_STDLIB_H 1
-
-/* Define to 1 if you have the <strings.h> header file. */
-#define HAVE_STRINGS_H 1
-
-/* Define to 1 if you have the <string.h> header file. */
-#define HAVE_STRING_H 1
-
-/* Define to 1 if you have the <sys/stat.h> header file. */
-#define HAVE_SYS_STAT_H 1
-
-/* Define to 1 if you have the <sys/types.h> header file. */
-#define HAVE_SYS_TYPES_H 1
-
-/* Define to 1 if you have the <unistd.h> header file. */
-#define HAVE_UNISTD_H 1
-
-/* Use Integer as Sample type */
-//#define INTEGER_SAMPLES 1
-
-/* Define as the return type of signal handlers (`int' or `void'). */
-#define RETSIGTYPE void
-
-/* Define to 1 if you have the ANSI C header files. */
-#define STDC_HEADERS 1

+#if (defined(__GNUC__) && !defined(ANDROID))
+    // In GCC, include soundtouch_config.h made by config scritps.
+    // Skip this in Android compilation that uses GCC but without configure scripts.
+    #include "soundtouch_config.h"
 #endif

 #ifndef _WINDEF_
@ -103,79 +65,89 @@ typedef unsigned long   ulongptr;

    typedef int BOOL;

-#ifndef FALSE
    #define FALSE   0
-#endif
-
-#ifndef TRUE
    #define TRUE    1
-#endif

 #endif  // _WINDEF_


 namespace soundtouch
 {
-/// Activate these undef's to overrule the possible sampletype 
-/// setting inherited from some other header file:
-//#undef INTEGER_SAMPLES
-//#undef FLOAT_SAMPLES
+    /// Activate these undef's to overrule the possible sampletype 
+    /// setting inherited from some other header file:
+    //#undef SOUNDTOUCH_INTEGER_SAMPLES
+    //#undef SOUNDTOUCH_FLOAT_SAMPLES

-#if !(INTEGER_SAMPLES || FLOAT_SAMPLES)
-   
-    /// Choose either 32bit floating point or 16bit integer sampletype
-    /// by choosing one of the following defines, unless this selection 
-    /// has already been done in some other file.
-    ////
-    /// Notes:
-    /// - In Windows environment, choose the sample format with the
-    ///   following defines.
-    /// - In GNU environment, the floating point samples are used by 
-    ///   default, but integer samples can be chosen by giving the 
-    ///   following switch to the configure script:
-    ///       ./configure --enable-integer-samples
-    ///   However, if you still prefer to select the sample format here 
-    ///   also in GNU environment, then please #undef the INTEGER_SAMPLE
-    ///   and FLOAT_SAMPLE defines first as in comments above.
-    //#define INTEGER_SAMPLES     1    //< 16bit integer samples
-    #define FLOAT_SAMPLES       1    //< 32bit float samples
- 
- #endif
+    #if (defined(__SOFTFP__))
+        // For Android compilation: Force use of Integer samples in case that
+        // compilation uses soft-floating point emulation - soft-fp is way too slow
+        #undef  SOUNDTOUCH_FLOAT_SAMPLES
+        #define SOUNDTOUCH_INTEGER_SAMPLES      1
+    #endif

-    /// Define this to allow CPU-specific assembler optimizations. Notice that 
-    /// having this enabled on non-x86 platforms doesn't matter; the compiler can 
-    /// drop unsupported extensions on different platforms automatically. 
-    /// However, if you're having difficulties getting the optimized routines 
-    /// compiled with your compler (e.g. some gcc compiler versions may be picky), 
-    /// you may wish to disable the optimizations to make the library compile.
-	#if !defined(_MSC_VER) || !defined(__x86_64__)
-	#define ALLOW_OPTIMIZATIONS 1
-	#define ALLOW_NONEXACT_SIMD_OPTIMIZATION    1
-	#endif
+    #if !(SOUNDTOUCH_INTEGER_SAMPLES || SOUNDTOUCH_FLOAT_SAMPLES)
+       
+        /// Choose either 32bit floating point or 16bit integer sampletype
+        /// by choosing one of the following defines, unless this selection 
+        /// has already been done in some other file.
+        ////
+        /// Notes:
+        /// - In Windows environment, choose the sample format with the
+        ///   following defines.
+        /// - In GNU environment, the floating point samples are used by 
+        ///   default, but integer samples can be chosen by giving the 
+        ///   following switch to the configure script:
+        ///       ./configure --enable-integer-samples
+        ///   However, if you still prefer to select the sample format here 
+        ///   also in GNU environment, then please #undef the INTEGER_SAMPLE
+        ///   and FLOAT_SAMPLE defines first as in comments above.
+        //#define SOUNDTOUCH_INTEGER_SAMPLES     1    //< 16bit integer samples
+        #define SOUNDTOUCH_FLOAT_SAMPLES       1    //< 32bit float samples
+     
+    #endif

+    #if (_M_IX86 || __i386__ || __x86_64__ || _M_X64)
+        /// Define this to allow X86-specific assembler/intrinsic optimizations. 
+        /// Notice that library contains also usual C++ versions of each of these
+        /// these routines, so if you're having difficulties getting the optimized 
+        /// routines compiled for whatever reason, you may disable these optimizations 
+        /// to make the library compile.
+
+        #define SOUNDTOUCH_ALLOW_X86_OPTIMIZATIONS     1
+
+        /// In GNU environment, allow the user to override this setting by
+        /// giving the following switch to the configure script:
+        /// ./configure --disable-x86-optimizations
+        /// ./configure --enable-x86-optimizations=no
+        #ifdef SOUNDTOUCH_DISABLE_X86_OPTIMIZATIONS
+            #undef SOUNDTOUCH_ALLOW_X86_OPTIMIZATIONS
+        #endif
+    #else
+        /// Always disable optimizations when not using a x86 systems.
+        #undef SOUNDTOUCH_ALLOW_X86_OPTIMIZATIONS
+
+    #endif

    // If defined, allows the SIMD-optimized routines to take minor shortcuts 
    // for improved performance. Undefine to require faithfully similar SIMD 
    // calculations as in normal C implementation.
-    
+    #define SOUNDTOUCH_ALLOW_NONEXACT_SIMD_OPTIMIZATION    1


-    #ifdef INTEGER_SAMPLES
+    #ifdef SOUNDTOUCH_INTEGER_SAMPLES
        // 16bit integer sample type
        typedef short SAMPLETYPE;
        // data type for sample accumulation: Use 32bit integer to prevent overflows
        typedef long  LONG_SAMPLETYPE;

-        #ifdef FLOAT_SAMPLES
+        #ifdef SOUNDTOUCH_FLOAT_SAMPLES
            // check that only one sample type is defined
            #error "conflicting sample types defined"
-        #endif // FLOAT_SAMPLES
+        #endif // SOUNDTOUCH_FLOAT_SAMPLES

-        #ifdef ALLOW_OPTIMIZATIONS
-            #if (_WIN32 || __i386__ || __x86_64__)
-                // Allow MMX optimizations
-                #define ALLOW_MMX   1
-            #endif
+        #ifdef SOUNDTOUCH_ALLOW_X86_OPTIMIZATIONS
+            // Allow MMX optimizations
+            #define SOUNDTOUCH_ALLOW_MMX   1
        #endif

    #else
@ -185,17 +157,31 @@ namespace soundtouch
        // data type for sample accumulation: Use double to utilize full precision.
        typedef double LONG_SAMPLETYPE;

-        #ifdef ALLOW_OPTIMIZATIONS
-                // Allow 3DNow! and SSE optimizations
-            #if _WIN32
-               // #define ALLOW_3DNOW     1
-            #endif
-            #if (_WIN32 || __i386__ || __x86_64__)
-                #define ALLOW_SSE       1
-            #endif
+        #ifdef SOUNDTOUCH_ALLOW_X86_OPTIMIZATIONS
+            // Allow SSE optimizations
+            #define SOUNDTOUCH_ALLOW_SSE       1
        #endif

-    #endif  // INTEGER_SAMPLES
+    #endif  // SOUNDTOUCH_INTEGER_SAMPLES
+
 };

-#endif
+// define ST_NO_EXCEPTION_HANDLING switch to disable throwing std exceptions:
+// #define ST_NO_EXCEPTION_HANDLING    1
+#ifdef ST_NO_EXCEPTION_HANDLING
+    // Exceptions disabled. Throw asserts instead if enabled.
+    #include <assert.h>
+    #define ST_THROW_RT_ERROR(x)    {assert((const char *)x);}
+#else
+    // use c++ standard exceptions
+    #include <stdexcept>
+    #define ST_THROW_RT_ERROR(x)    {throw std::runtime_error(x);}
+#endif
+
+// When this #define is active, eliminates a clicking sound when the "rate" or "pitch" 
+// parameter setting crosses from value <1 to >=1 or vice versa during processing. 
+// Default is off as such crossover is untypical case and involves a slight sound 
+// quality compromise.
+//#define SOUNDTOUCH_PREVENT_CLICK_AT_RATE_CROSSOVER   1
+
+#endif
--- a/desmume/src/metaspu/SoundTouch/SoundTouch.cpp
+++ b/desmume/src/metaspu/SoundTouch/SoundTouch.cpp
@ -41,10 +41,10 @@
 ///
 ////////////////////////////////////////////////////////////////////////////////
 //
-// Last changed  : $Date: 2006/02/05 16:44:06 $
-// File revision : $Revision: 1.13 $
+// Last changed  : $Date: 2012-06-13 16:29:53 -0300 (qua, 13 jun 2012) $
+// File revision : $Revision: 4 $
 //
-// $Id: SoundTouch.cpp,v 1.13 2006/02/05 16:44:06 Olli Exp $
+// $Id: SoundTouch.cpp 143 2012-06-13 19:29:53Z oparviai $
 //
 ////////////////////////////////////////////////////////////////////////////////
 //
@ -73,7 +73,6 @@
 #include <stdlib.h>
 #include <memory.h>
 #include <math.h>
-#include <stdexcept>
 #include <stdio.h>

 #include "SoundTouch.h"
@ -82,8 +81,12 @@
 #include "cpu_detect.h"

 using namespace soundtouch;
+    
+/// test if two floating point numbers are equal
+#define TEST_FLOAT_EQUAL(a, b)  (fabs(a - b) < 1e-10)

-/// Print library version string
+
+/// Print library version string for autoconf
 extern "C" void soundtouch_ac_test()
 {
    printf("SoundTouch Version: %s\n",SOUNDTOUCH_VERSION);
@ -142,11 +145,11 @@ void SoundTouch::setChannels(uint numChannels)
 {
    if (numChannels != 1 && numChannels != 2) 
    {
-        throw std::runtime_error("Illegal number of channels");
+        ST_THROW_RT_ERROR("Illegal number of channels");
    }
    channels = numChannels;
-    pRateTransposer->setChannels(numChannels);
-    pTDStretch->setChannels(numChannels);
+    pRateTransposer->setChannels((int)numChannels);
+    pTDStretch->setChannels((int)numChannels);
 }


@ -236,10 +239,28 @@ void SoundTouch::calcEffectiveRateAndTempo()
    tempo = virtualTempo / virtualPitch;
    rate = virtualPitch * virtualRate;

-    if (rate != oldRate) pRateTransposer->setRate(rate);
-    if (tempo != oldTempo) pTDStretch->setTempo(tempo);
+    if (!TEST_FLOAT_EQUAL(rate,oldRate)) pRateTransposer->setRate(rate);
+    if (!TEST_FLOAT_EQUAL(tempo, oldTempo)) pTDStretch->setTempo(tempo);

-    if (rate > 1.0f) 
+#ifndef SOUNDTOUCH_PREVENT_CLICK_AT_RATE_CROSSOVER
+    if (rate <= 1.0f) 
+    {
+        if (output != pTDStretch) 
+        {
+            FIFOSamplePipe *tempoOut;
+
+            assert(output == pRateTransposer);
+            // move samples in the current output buffer to the output of pTDStretch
+            tempoOut = pTDStretch->getOutput();
+            tempoOut->moveSamples(*output);
+            // move samples in pitch transposer's store buffer to tempo changer's input
+            pTDStretch->moveSamples(*pRateTransposer->getStore());
+
+            output = pTDStretch;
+        }
+    }
+    else
+#endif
    {
        if (output != pRateTransposer) 
        {
@ -255,23 +276,6 @@ void SoundTouch::calcEffectiveRateAndTempo()
            output = pRateTransposer;
        }
    } 
-    else 
-    {
-        if (output != pTDStretch) 
-        {
-            FIFOSamplePipe *tempoOut;
-
-            assert(output == pRateTransposer);
-            // move samples in the current output buffer to the output of pTDStretch
-            tempoOut = pTDStretch->getOutput();
-            tempoOut->moveSamples(*output);
-            // move samples in pitch transposer's store buffer to tempo changer's input
-            pTDStretch->moveSamples(*pRateTransposer->getStore());
-
-            output = pTDStretch;
-
-        }
-    }
 }


@ -280,21 +284,21 @@ void SoundTouch::setSampleRate(uint srate)
 {
    bSrateSet = TRUE;
    // set sample rate, leave other tempo changer parameters as they are.
-    pTDStretch->setParameters(srate);
+    pTDStretch->setParameters((int)srate);
 }


 // Adds 'numSamples' pcs of samples from the 'samples' memory position into
 // the input of the object.
-void SoundTouch::putSamples(const SAMPLETYPE *samples, uint numSamples)
+void SoundTouch::putSamples(const SAMPLETYPE *samples, uint nSamples)
 {
    if (bSrateSet == FALSE) 
    {
-        throw std::runtime_error("SoundTouch : Sample rate not defined");
+        ST_THROW_RT_ERROR("SoundTouch : Sample rate not defined");
    } 
    else if (channels == 0) 
    {
-        throw std::runtime_error("SoundTouch : Number of channels not defined");
+        ST_THROW_RT_ERROR("SoundTouch : Number of channels not defined");
    }

    // Transpose the rate of the new samples if necessary
@ -309,22 +313,23 @@ void SoundTouch::putSamples(const SAMPLETYPE *samples, uint numSamples)
            // (may happen if 'rate' changes from a non-zero value to zero)
            pTDStretch->moveSamples(*pRateTransposer);
        }
-        pTDStretch->putSamples(samples, numSamples);
+        pTDStretch->putSamples(samples, nSamples);
    } 
    */
+#ifndef SOUNDTOUCH_PREVENT_CLICK_AT_RATE_CROSSOVER
    else if (rate <= 1.0f) 
    {
        // transpose the rate down, output the transposed sound to tempo changer buffer
        assert(output == pTDStretch);
-        pRateTransposer->putSamples(samples, numSamples);
+        pRateTransposer->putSamples(samples, nSamples);
        pTDStretch->moveSamples(*pRateTransposer);
    } 
    else 
+#endif
    {
-        assert(rate > 1.0f);
        // evaluate the tempo changer, then transpose the rate up, 
        assert(output == pRateTransposer);
-        pTDStretch->putSamples(samples, numSamples);
+        pTDStretch->putSamples(samples, nSamples);
        pRateTransposer->moveSamples(*pTDStretch);
    }
 }
@ -340,12 +345,19 @@ void SoundTouch::putSamples(const SAMPLETYPE *samples, uint numSamples)
 void SoundTouch::flush()
 {
    int i;
-    uint nOut;
-    SAMPLETYPE buff[128];
+    int nUnprocessed;
+    int nOut;
+    SAMPLETYPE buff[64*2];   // note: allocate 2*64 to cater 64 sample frames of stereo sound

-    nOut = numSamples();
+    // check how many samples still await processing, and scale
+    // that by tempo & rate to get expected output sample count
+    nUnprocessed = numUnprocessedSamples();
+    nUnprocessed = (int)((double)nUnprocessed / (tempo * rate) + 0.5);

-    memset(buff, 0, 128 * sizeof(SAMPLETYPE));
+    nOut = numSamples();        // ready samples currently in buffer ...
+    nOut += nUnprocessed;       // ... and how many we expect there to be in the end
+    
+    memset(buff, 0, 64 * channels * sizeof(SAMPLETYPE));
    // "Push" the last active samples out from the processing pipeline by
    // feeding blank samples into the processing pipeline until new, 
    // processed samples appear in the output (not however, more than 
@ -353,7 +365,16 @@ void SoundTouch::flush()
    for (i = 0; i < 128; i ++) 
    {
        putSamples(buff, 64);
-        if (numSamples() != nOut) break;  // new samples have appeared in the output!
+        if ((int)numSamples() >= nOut) 
+        {
+            // Enough new samples have appeared into the output!
+            // As samples come from processing with bigger chunks, now truncate it
+            // back to maximum "nOut" samples to improve duration accuracy 
+            adjustAmountOfSamples(nOut);
+
+            // finish
+            break;  
+        }
    }

    // Clear working buffers
@ -366,9 +387,9 @@ void SoundTouch::flush()

 // Changes a setting controlling the processing system behaviour. See the
 // 'SETTING_...' defines for available setting ID's.
-BOOL SoundTouch::setSetting(uint settingId, uint value)
+BOOL SoundTouch::setSetting(int settingId, int value)
 {
-    uint sampleRate, sequenceMs, seekWindowMs, overlapMs;
+    int sampleRate, sequenceMs, seekWindowMs, overlapMs;

    // read current tdstretch routine parameters
    pTDStretch->getParameters(&sampleRate, &sequenceMs, &seekWindowMs, &overlapMs);
@ -415,20 +436,20 @@ BOOL SoundTouch::setSetting(uint settingId, uint value)
 // 'SETTING_...' defines for available setting ID's.
 //
 // Returns the setting value.
-uint SoundTouch::getSetting(uint settingId) const
+int SoundTouch::getSetting(int settingId) const
 {
-    uint temp;
+    int temp;

    switch (settingId) 
    {
        case SETTING_USE_AA_FILTER :
-            return pRateTransposer->isAAFilterEnabled();
+            return (uint)pRateTransposer->isAAFilterEnabled();

        case SETTING_AA_FILTER_LENGTH :
            return pRateTransposer->getAAFilter()->getLength();

        case SETTING_USE_QUICKSEEK :
-            return pTDStretch->isQuickSeekEnabled();
+            return (uint)   pTDStretch->isQuickSeekEnabled();

        case SETTING_SEQUENCE_MS:
            pTDStretch->getParameters(NULL, &temp, NULL, NULL);
@ -442,7 +463,13 @@ uint SoundTouch::getSetting(uint settingId) const
            pTDStretch->getParameters(NULL, NULL, NULL, &temp);
            return temp;

-        default :
+		case SETTING_NOMINAL_INPUT_SEQUENCE :
+			return pTDStretch->getInputSampleReq();
+
+		case SETTING_NOMINAL_OUTPUT_SEQUENCE :
+			return pTDStretch->getOutputBatchSize();
+
+		default :
            return 0;
    }
 }
--- a/desmume/src/metaspu/SoundTouch/SoundTouch.h
+++ b/desmume/src/metaspu/SoundTouch/SoundTouch.h
@ -41,10 +41,10 @@
 ///
 ////////////////////////////////////////////////////////////////////////////////
 //
-// Last changed  : $Date: 2006/02/05 16:44:06 $
-// File revision : $Revision: 1.14 $
+// Last changed  : $Date: 2012-12-28 17:32:59 -0200 (sex, 28 dez 2012) $
+// File revision : $Revision: 4 $
 //
-// $Id: SoundTouch.h,v 1.14 2006/02/05 16:44:06 Olli Exp $
+// $Id: SoundTouch.h 163 2012-12-28 19:32:59Z oparviai $
 //
 ////////////////////////////////////////////////////////////////////////////////
 //
@ -79,10 +79,10 @@ namespace soundtouch
 {

 /// Soundtouch library version string
-#define SOUNDTOUCH_VERSION          "1.3.1"
+#define SOUNDTOUCH_VERSION          "1.7.1"

 /// SoundTouch library version id
-#define SOUNDTOUCH_VERSION_ID       010301
+#define SOUNDTOUCH_VERSION_ID       (10701)

 //
 // Available setting IDs for the 'setSetting' & 'get_setting' functions:
@ -116,6 +116,31 @@ namespace soundtouch
 #define SETTING_OVERLAP_MS          5


+/// Call "getSetting" with this ID to query nominal average processing sequence
+/// size in samples. This value tells approcimate value how many input samples 
+/// SoundTouch needs to gather before it does DSP processing run for the sample batch.
+///
+/// Notices: 
+/// - This is read-only parameter, i.e. setSetting ignores this parameter
+/// - Returned value is approximate average value, exact processing batch
+///   size may wary from time to time
+/// - This parameter value is not constant but may change depending on 
+///   tempo/pitch/rate/samplerate settings.
+#define SETTING_NOMINAL_INPUT_SEQUENCE		6
+
+
+/// Call "getSetting" with this ID to query nominal average processing output 
+/// size in samples. This value tells approcimate value how many output samples 
+/// SoundTouch outputs once it does DSP processing run for a batch of input samples.
+///	
+/// Notices: 
+/// - This is read-only parameter, i.e. setSetting ignores this parameter
+/// - Returned value is approximate average value, exact processing batch
+///   size may wary from time to time
+/// - This parameter value is not constant but may change depending on 
+///   tempo/pitch/rate/samplerate settings.
+#define SETTING_NOMINAL_OUTPUT_SEQUENCE		7
+
 class SoundTouch : public FIFOProcessor
 {
 private:
@ -223,16 +248,16 @@ public:
    /// 'SETTING_...' defines for available setting ID's.
    /// 
    /// \return 'TRUE' if the setting was succesfully changed
-    BOOL setSetting(uint settingId,   ///< Setting ID number. see SETTING_... defines.
-                    uint value        ///< New setting value.
+    BOOL setSetting(int settingId,   ///< Setting ID number. see SETTING_... defines.
+                    int value        ///< New setting value.
                    );

    /// Reads a setting controlling the processing system behaviour. See the
    /// 'SETTING_...' defines for available setting ID's.
    ///
    /// \return the setting value.
-    uint getSetting(uint settingId    ///< Setting ID number, see SETTING_... defines.
-                    ) const;
+    int getSetting(int settingId    ///< Setting ID number, see SETTING_... defines.
+                   ) const;

    /// Returns number of samples currently unprocessed.
    virtual uint numUnprocessedSamples() const;
--- a/desmume/src/metaspu/SoundTouch/TDStretch.cpp
+++ b/desmume/src/metaspu/SoundTouch/TDStretch.cpp
--- a/desmume/src/metaspu/SoundTouch/TDStretch.h
+++ b/desmume/src/metaspu/SoundTouch/TDStretch.h
@ -4,8 +4,8 @@
 /// while maintaining the original pitch by using a time domain WSOLA-like method 
 /// with several performance-increasing tweaks.
 ///
-/// Note : MMX optimized functions reside in a separate, platform-specific file, 
-/// e.g. 'mmx_win.cpp' or 'mmx_gcc.cpp'
+/// Note : MMX/SSE optimized functions reside in separate, platform-specific files 
+/// 'mmx_optimized.cpp' and 'sse_optimized.cpp'
 ///
 /// Author        : Copyright (c) Olli Parviainen
 /// Author e-mail : oparviai 'at' iki.fi
@ -13,10 +13,10 @@
 ///
 ////////////////////////////////////////////////////////////////////////////////
 //
-// Last changed  : $Date: 2006/02/05 16:44:06 $
-// File revision : $Revision: 1.16 $
+// Last changed  : $Date: 2012-04-01 16:49:30 -0300 (dom, 01 abr 2012) $
+// File revision : $Revision: 4 $
 //
-// $Id: TDStretch.h,v 1.16 2006/02/05 16:44:06 Olli Exp $
+// $Id: TDStretch.h 137 2012-04-01 19:49:30Z oparviai $
 //
 ////////////////////////////////////////////////////////////////////////////////
 //
@ -44,6 +44,7 @@
 #ifndef TDStretch_H
 #define TDStretch_H

+#include <stddef.h>
 #include "STTypes.h"
 #include "RateTransposer.h"
 #include "FIFOSamplePipe.h"
@ -51,7 +52,13 @@
 namespace soundtouch
 {

-// Default values for sound processing parameters:
+/// Default values for sound processing parameters:
+/// Notice that the default parameters are tuned for contemporary popular music 
+/// processing. For speech processing applications these parameters suit better:
+///     #define DEFAULT_SEQUENCE_MS     40
+///     #define DEFAULT_SEEKWINDOW_MS   15
+///     #define DEFAULT_OVERLAP_MS      8
+///

 /// Default length of a single processing sequence, in milliseconds. This determines to how 
 /// long sequences the original sound is chopped in the time-stretch algorithm.
@ -61,11 +68,41 @@ namespace soundtouch
 /// and vice versa.
 ///
 /// Increasing this value reduces computational burden & vice versa.
-#define DEFAULT_SEQUENCE_MS     63
+//#define DEFAULT_SEQUENCE_MS         40
+#define DEFAULT_SEQUENCE_MS         USE_AUTO_SEQUENCE_LEN

-#define DEFAULT_SEEKWINDOW_MS   17
+/// Giving this value for the sequence length sets automatic parameter value
+/// according to tempo setting (recommended)
+#define USE_AUTO_SEQUENCE_LEN       0

-#define DEFAULT_OVERLAP_MS      7
+/// Seeking window default length in milliseconds for algorithm that finds the best possible 
+/// overlapping location. This determines from how wide window the algorithm may look for an 
+/// optimal joining location when mixing the sound sequences back together. 
+///
+/// The bigger this window setting is, the higher the possibility to find a better mixing
+/// position will become, but at the same time large values may cause a "drifting" artifact
+/// because consequent sequences will be taken at more uneven intervals.
+///
+/// If there's a disturbing artifact that sounds as if a constant frequency was drifting 
+/// around, try reducing this setting.
+///
+/// Increasing this value increases computational burden & vice versa.
+//#define DEFAULT_SEEKWINDOW_MS       15
+#define DEFAULT_SEEKWINDOW_MS       USE_AUTO_SEEKWINDOW_LEN
+
+/// Giving this value for the seek window length sets automatic parameter value
+/// according to tempo setting (recommended)
+#define USE_AUTO_SEEKWINDOW_LEN     0
+
+/// Overlap length in milliseconds. When the chopped sound sequences are mixed back together, 
+/// to form a continuous sound stream, this parameter defines over how long period the two 
+/// consecutive sequences are let to overlap each other. 
+///
+/// This shouldn't be that critical parameter. If you reduce the DEFAULT_SEQUENCE_MS setting 
+/// by a large amount, you might wish to try a smaller value on this.
+///
+/// Increasing this value increases computational burden & vice versa.
+#define DEFAULT_OVERLAP_MS      8


 /// Class that does the time-stretch (tempo change) effect for the processed
@ -73,44 +110,40 @@ namespace soundtouch
 class TDStretch : public FIFOProcessor
 {
 protected:
-    uint channels;
-    uint sampleReq;
+    int channels;
+    int sampleReq;
    float tempo;

    SAMPLETYPE *pMidBuffer;
-    SAMPLETYPE *pRefMidBuffer;
-    SAMPLETYPE *pRefMidBufferUnaligned;
-    uint overlapLength;
-    uint overlapDividerBits;
-    uint slopingDivider;
-    uint seekLength;
-    uint seekWindowLength;
-    uint maxOffset;
+    SAMPLETYPE *pMidBufferUnaligned;
+    int overlapLength;
+    int seekLength;
+    int seekWindowLength;
+    int overlapDividerBits;
+    int slopingDivider;
    float nominalSkip;
    float skipFract;
    FIFOSampleBuffer outputBuffer;
    FIFOSampleBuffer inputBuffer;
-    BOOL bQuickseek;
-    BOOL bMidBufferDirty;
+    BOOL bQuickSeek;

-    uint sampleRate;
-    uint sequenceMs;
-    uint seekWindowMs;
-    uint overlapMs;
+    int sampleRate;
+    int sequenceMs;
+    int seekWindowMs;
+    int overlapMs;
+    BOOL bAutoSeqSetting;
+    BOOL bAutoSeekSetting;

-    void acceptNewOverlapLength(uint newOverlapLength);
+    void acceptNewOverlapLength(int newOverlapLength);

    virtual void clearCrossCorrState();
-    void calculateOverlapLength(uint overlapMs);
+    void calculateOverlapLength(int overlapMs);

-    virtual LONG_SAMPLETYPE calcCrossCorrStereo(const SAMPLETYPE *mixingPos, const SAMPLETYPE *compare) const;
-    virtual LONG_SAMPLETYPE calcCrossCorrMono(const SAMPLETYPE *mixingPos, const SAMPLETYPE *compare) const;
+    virtual double calcCrossCorr(const SAMPLETYPE *mixingPos, const SAMPLETYPE *compare) const;

-    virtual uint seekBestOverlapPositionStereo(const SAMPLETYPE *refPos);
-    virtual uint seekBestOverlapPositionStereoQuick(const SAMPLETYPE *refPos);
-    virtual uint seekBestOverlapPositionMono(const SAMPLETYPE *refPos);
-    virtual uint seekBestOverlapPositionMonoQuick(const SAMPLETYPE *refPos);
-    uint seekBestOverlapPosition(const SAMPLETYPE *refPos);
+    virtual int seekBestOverlapPositionFull(const SAMPLETYPE *refPos);
+    virtual int seekBestOverlapPositionQuick(const SAMPLETYPE *refPos);
+    int seekBestOverlapPosition(const SAMPLETYPE *refPos);

    virtual void overlapStereo(SAMPLETYPE *output, const SAMPLETYPE *input) const;
    virtual void overlapMono(SAMPLETYPE *output, const SAMPLETYPE *input) const;
@ -118,10 +151,7 @@ protected:
    void clearMidBuffer();
    void overlap(SAMPLETYPE *output, const SAMPLETYPE *input, uint ovlPos) const;

-    void precalcCorrReferenceMono();
-    void precalcCorrReferenceStereo();
-
-    void processNominalTempo();
+    void calcSeqParameters();

    /// Changes the tempo of the given sound samples.
    /// Returns amount of samples returned in the "output" buffer.
@ -135,7 +165,7 @@ public:

    /// Operator 'new' is overloaded so that it automatically creates a suitable instance 
    /// depending on if we've a MMX/SSE/etc-capable CPU available or not.
-    void *operator new(size_t s);
+    static void *operator new(size_t s);

    /// Use this function instead of "new" operator to create a new instance of this class. 
    /// This function automatically chooses a correct feature set depending on if the CPU
@ -159,7 +189,7 @@ public:
    void clearInput();

    /// Sets the number of channels, 1 = mono, 2 = stereo
-    void setChannels(uint numChannels);
+    void setChannels(int numChannels);

    /// Enables/disables the quick position seeking algorithm. Zero to disable, 
    /// nonzero to enable
@ -176,16 +206,16 @@ public:
    /// 'seekwindowMS' = seeking window length for scanning the best overlapping 
    ///      position
    /// 'overlapMS' = overlapping length
-    void setParameters(uint sampleRate,                             ///< Samplerate of sound being processed (Hz)
-                       uint sequenceMS = DEFAULT_SEQUENCE_MS,       ///< Single processing sequence length (ms)
-                       uint seekwindowMS = DEFAULT_SEEKWINDOW_MS,   ///< Offset seeking window length (ms)
-                       uint overlapMS = DEFAULT_OVERLAP_MS          ///< Sequence overlapping length (ms)
+    void setParameters(int sampleRate,          ///< Samplerate of sound being processed (Hz)
+                       int sequenceMS = -1,     ///< Single processing sequence length (ms)
+                       int seekwindowMS = -1,   ///< Offset seeking window length (ms)
+                       int overlapMS = -1       ///< Sequence overlapping length (ms)
                       );

    /// Get routine control parameters, see setParameters() function.
    /// Any of the parameters to this function can be NULL, in such case corresponding parameter
    /// value isn't returned.
-    void getParameters(uint *pSampleRate, uint *pSequenceMs, uint *pSeekWindowMs, uint *pOverlapMs);
+    void getParameters(int *pSampleRate, int *pSequenceMs, int *pSeekWindowMs, int *pOverlapMs) const;

    /// Adds 'numsamples' pcs of samples from the 'samples' memory position into
    /// the input of the object.
@ -194,43 +224,45 @@ public:
            uint numSamples                         ///< Number of samples in 'samples' so that one sample
                                                    ///< contains both channels if stereo
            );
+
+    /// return nominal input sample requirement for triggering a processing batch
+    int getInputSampleReq() const
+    {
+        return (int)(nominalSkip + 0.5);
+    }
+
+    /// return nominal output sample amount when running a processing batch
+    int getOutputBatchSize() const
+    {
+        return seekWindowLength - overlapLength;
+    }
 };



 // Implementation-specific class declarations:

-//#ifdef ALLOW_MMX
-//    /// Class that implements MMX optimized routines for 16bit integer samples type.
-//    class TDStretchMMX : public TDStretch
-//    {
-//    protected:
-//        long calcCrossCorrStereo(const short *mixingPos, const short *compare) const;
-//        virtual void overlapStereo(short *output, const short *input) const;
-//        virtual void clearCrossCorrState();
-//    };
-//#endif /// ALLOW_MMX
-//
-//
-//#ifdef ALLOW_3DNOW
-//    /// Class that implements 3DNow! optimized routines for floating point samples type.
-//    class TDStretch3DNow : public TDStretch
-//    {
-//    protected:
-//        double calcCrossCorrStereo(const float *mixingPos, const float *compare) const;
-//    };
-//#endif /// ALLOW_3DNOW
+#ifdef SOUNDTOUCH_ALLOW_MMX
+    /// Class that implements MMX optimized routines for 16bit integer samples type.
+    class TDStretchMMX : public TDStretch
+    {
+    protected:
+        double calcCrossCorr(const short *mixingPos, const short *compare) const;
+        virtual void overlapStereo(short *output, const short *input) const;
+        virtual void clearCrossCorrState();
+    };
+#endif /// SOUNDTOUCH_ALLOW_MMX


-#ifdef ALLOW_SSE
+#ifdef SOUNDTOUCH_ALLOW_SSE
    /// Class that implements SSE optimized routines for floating point samples type.
    class TDStretchSSE : public TDStretch
    {
    protected:
-        double calcCrossCorrStereo(const float *mixingPos, const float *compare) const;
+        double calcCrossCorr(const float *mixingPos, const float *compare) const;
    };

-#endif /// ALLOW_SSE
+#endif /// SOUNDTOUCH_ALLOW_SSE

 }
 #endif  /// TDStretch_H
--- a/desmume/src/metaspu/SoundTouch/WavFile.cpp
+++ b/desmume/src/metaspu/SoundTouch/WavFile.cpp
@ -1,4 +1,4 @@
-////////////////////////////////////////////////////////////////////////////////
+ ////////////////////////////////////////////////////////////////////////////////
 ///
 /// Classes for easy reading & writing of WAV sound files. 
 ///
@ -17,10 +17,10 @@
 ///
 ////////////////////////////////////////////////////////////////////////////////
 //
-// Last changed  : $Date: 2006/02/05 16:44:06 $
-// File revision : $Revision: 1.15 $
+// Last changed  : $Date: 2012-09-01 05:03:26 -0300 (sáb, 01 set 2012) $
+// File revision : $Revision: 4 $
 //
-// $Id: WavFile.cpp,v 1.15 2006/02/05 16:44:06 Olli Exp $
+// $Id: WavFile.cpp 154 2012-09-01 08:03:26Z oparviai $
 //
 ////////////////////////////////////////////////////////////////////////////////
 //
@ -46,22 +46,21 @@
 ////////////////////////////////////////////////////////////////////////////////

 #include <stdio.h>
-#include <stdexcept>
 #include <string>
+#include <sstream>
+#include <cstring>
 #include <assert.h>
 #include <limits.h>

-#include <cstdlib>
-#include <cstring>
-
 #include "WavFile.h"
+#include "STTypes.h"

 using namespace std;

-const static char riffStr[] = "RIFF";
-const static char waveStr[] = "WAVE";
-const static char fmtStr[]  = "fmt ";
-const static char dataStr[] = "data";
+static const char riffStr[] = "RIFF";
+static const char waveStr[] = "WAVE";
+static const char fmtStr[]  = "fmt ";
+static const char dataStr[] = "data";


 //////////////////////////////////////////////////////////////////////////////
@ -85,29 +84,31 @@ const static char dataStr[] = "data";
    // big-endian CPU, swap bytes in 16 & 32 bit words

    // helper-function to swap byte-order of 32bit integer
-    static inline void _swap32(unsigned int &dwData)
+    static inline int _swap32(int &dwData)
    {
        dwData = ((dwData >> 24) & 0x000000FF) | 
-                 ((dwData >> 8)  & 0x0000FF00) | 
-                 ((dwData << 8)  & 0x00FF0000) | 
-                 ((dwData << 24) & 0xFF000000);
+               ((dwData >> 8)  & 0x0000FF00) | 
+               ((dwData << 8)  & 0x00FF0000) | 
+               ((dwData << 24) & 0xFF000000);
+        return dwData;
    }   

    // helper-function to swap byte-order of 16bit integer
-    static inline void _swap16(unsigned short &wData)
+    static inline short _swap16(short &wData)
    {
        wData = ((wData >> 8) & 0x00FF) | 
                ((wData << 8) & 0xFF00);
+        return wData;
    }

    // helper-function to swap byte-order of buffer of 16bit integers
-    static inline void _swap16Buffer(unsigned short *pData, unsigned int dwNumWords)
+    static inline void _swap16Buffer(short *pData, int numWords)
    {
-        unsigned long i;
+        int i;

-        for (i = 0; i < dwNumWords; i ++)
+        for (i = 0; i < numWords; i ++)
        {
-            _swap16(pData[i]);
+            pData[i] = _swap16(pData[i]);
        }
    }

@ -115,19 +116,21 @@ const static char dataStr[] = "data";
    // little-endian CPU, WAV file is ok as such

    // dummy helper-function
-    static inline void _swap32(unsigned int &dwData)
+    static inline int _swap32(int &dwData)
    {
        // do nothing
+        return dwData;
    }   

    // dummy helper-function
-    static inline void _swap16(unsigned short &wData)
+    static inline short _swap16(short &wData)
    {
        // do nothing
+        return wData;
    }

    // dummy helper-function
-    static inline void _swap16Buffer(unsigned short *pData, unsigned int dwNumBytes)
+    static inline void _swap16Buffer(short *pData, int numBytes)
    {
        // do nothing
    }
@ -135,6 +138,39 @@ const static char dataStr[] = "data";
 #endif  // BIG_ENDIAN


+//////////////////////////////////////////////////////////////////////////////
+//
+// Class WavFileBase
+//
+
+WavFileBase::WavFileBase()
+{
+    convBuff = NULL;
+    convBuffSize = 0;
+}
+
+
+WavFileBase::~WavFileBase()
+{
+    delete[] convBuff;
+    convBuffSize = 0;
+}
+
+
+/// Get pointer to conversion buffer of at min. given size
+void *WavFileBase::getConvBuffer(int sizeBytes)
+{
+    if (convBuffSize < sizeBytes)
+    {
+        delete[] convBuff;
+
+        convBuffSize = (sizeBytes + 15) & -8;   // round up to following 8-byte bounday
+        convBuff = new char[convBuffSize];
+    }
+    return convBuff;
+}
+
+
 //////////////////////////////////////////////////////////////////////////////
 //
 // Class WavInFile
@ -142,8 +178,6 @@ const static char dataStr[] = "data";

 WavInFile::WavInFile(const char *fileName)
 {
-    int hdrsOk;
-
    // Try to open the file for reading
    fptr = fopen(fileName, "rb");
    if (fptr == NULL) 
@ -152,27 +186,52 @@ WavInFile::WavInFile(const char *fileName)
        string msg = "Error : Unable to open file \"";
        msg += fileName;
        msg += "\" for reading.";
-        throw runtime_error(msg);
+        ST_THROW_RT_ERROR(msg.c_str());
    }

+    init();
+}
+
+
+WavInFile::WavInFile(FILE *file)
+{
+    // Try to open the file for reading
+    fptr = file;
+    if (!file) 
+    {
+        // didn't succeed
+        string msg = "Error : Unable to access input stream for reading";
+        ST_THROW_RT_ERROR(msg.c_str());
+    }
+
+    init();
+}
+
+
+/// Init the WAV file stream
+void WavInFile::init()
+{
+    int hdrsOk;
+
+    // assume file stream is already open
+    assert(fptr);
+
    // Read the file headers
    hdrsOk = readWavHeaders();
    if (hdrsOk != 0) 
    {
        // Something didn't match in the wav file headers 
-        string msg = "File \"";
-        msg += fileName;
-        msg += "\" is corrupt or not a WAV file";
-        throw runtime_error(msg);
+        string msg = "Input file is corrupt or not a WAV file";
+        ST_THROW_RT_ERROR(msg.c_str());
    }

+    /* Ignore 'fixed' field value as 32bit signed linear data can have other value than 1.
    if (header.format.fixed != 1)
    {
-        string msg = "File \"";
-        msg += fileName;
-        msg += "\" uses unsupported encoding.";
-        throw runtime_error(msg);
+        string msg = "Input file uses unsupported encoding.";
+        ST_THROW_RT_ERROR(msg.c_str());
    }
+    */

    dataRead = 0;
 }
@ -181,7 +240,8 @@ WavInFile::WavInFile(const char *fileName)

 WavInFile::~WavInFile()
 {
-    close();
+    if (fptr) fclose(fptr);
+    fptr = NULL;
 }


@ -197,7 +257,7 @@ void WavInFile::rewind()
 }


-int WavInFile::checkCharTags()
+int WavInFile::checkCharTags() const
 {
    // header.format.fmt should equal to 'fmt '
    if (memcmp(fmtStr, header.format.fmt, 4) != 0) return -1;
@ -208,7 +268,7 @@ int WavInFile::checkCharTags()
 }


-int WavInFile::read(char *buffer, int maxElems)
+int WavInFile::read(unsigned char *buffer, int maxElems)
 {
    int numBytes;
    uint afterDataRead;
@ -216,7 +276,7 @@ int WavInFile::read(char *buffer, int maxElems)
    // ensure it's 8 bit format
    if (header.format.bits_per_sample != 8)
    {
-        throw runtime_error("Error: WavInFile::read(char*, int) works only with 8bit samples.");
+        ST_THROW_RT_ERROR("Error: WavInFile::read(char*, int) works only with 8bit samples.");
    }
    assert(sizeof(char) == 1);

@ -225,11 +285,12 @@ int WavInFile::read(char *buffer, int maxElems)
    if (afterDataRead > header.data.data_len) 
    {
        // Don't read more samples than are marked available in header
-        numBytes = header.data.data_len - dataRead;
+        numBytes = (int)header.data.data_len - (int)dataRead;
        assert(numBytes >= 0);
    }

-    numBytes = fread(buffer, 1, numBytes, fptr);
+    assert(buffer);
+    numBytes = (int)fread(buffer, 1, numBytes, fptr);
    dataRead += numBytes;

    return numBytes;
@ -242,67 +303,155 @@ int WavInFile::read(short *buffer, int maxElems)
    int numBytes;
    int numElems;

-    if (header.format.bits_per_sample == 8)
+    assert(buffer);
+    switch (header.format.bits_per_sample)
    {
-        // 8 bit format
-        char *temp = new char[maxElems];
-        int i;
-
-        numElems = read(temp, maxElems);
-        // convert from 8 to 16 bit
-        for (i = 0; i < numElems; i ++)
+        case 8:
        {
-            buffer[i] = temp[i] << 8;
-        }
-        delete[] temp;
-    }
-    else
-    {
-        // 16 bit format
-        assert(header.format.bits_per_sample == 16);
-        assert(sizeof(short) == 2);
+            // 8 bit format
+            unsigned char *temp = (unsigned char*)getConvBuffer(maxElems);
+            int i;

-        numBytes = maxElems * 2;
-        afterDataRead = dataRead + numBytes;
-        if (afterDataRead > header.data.data_len) 
-        {
-            // Don't read more samples than are marked available in header
-            numBytes = header.data.data_len - dataRead;
-            assert(numBytes >= 0);
+            numElems = read(temp, maxElems);
+            // convert from 8 to 16 bit
+            for (i = 0; i < numElems; i ++)
+            {
+                buffer[i] = (short)(((short)temp[i] - 128) * 256);
+            }
+            break;
        }

-        numBytes = fread(buffer, 1, numBytes, fptr);
-        dataRead += numBytes;
-        numElems = numBytes / 2;
+        case 16:
+        {
+            // 16 bit format

-        // 16bit samples, swap byte order if necessary
-        _swap16Buffer((unsigned short *)buffer, numElems);
-    }
+            assert(sizeof(short) == 2);
+
+            numBytes = maxElems * 2;
+            afterDataRead = dataRead + numBytes;
+            if (afterDataRead > header.data.data_len) 
+            {
+                // Don't read more samples than are marked available in header
+                numBytes = (int)header.data.data_len - (int)dataRead;
+                assert(numBytes >= 0);
+            }
+
+            numBytes = (int)fread(buffer, 1, numBytes, fptr);
+            dataRead += numBytes;
+            numElems = numBytes / 2;
+
+            // 16bit samples, swap byte order if necessary
+            _swap16Buffer((short *)buffer, numElems);
+            break;
+        }
+
+        default:
+        {
+            stringstream ss;
+            ss << "\nOnly 8/16 bit sample WAV files supported in integer compilation. Can't open WAV file with ";
+            ss << (int)header.format.bits_per_sample;
+            ss << " bit sample format. ";
+            ST_THROW_RT_ERROR(ss.str().c_str());
+        }
+    };

    return numElems;
 }


-
+/// Read data in float format. Notice that when reading in float format 
+/// 8/16/24/32 bit sample formats are supported
 int WavInFile::read(float *buffer, int maxElems)
 {
-    short *temp = new short[maxElems];
-    int num;
-    int i;
-    double fscale;
+    unsigned int afterDataRead;
+    int numBytes;
+    int numElems;
+    int bytesPerSample;

-    num = read(temp, maxElems);
+    assert(buffer);

-    fscale = 1.0 / 32768.0;
-    // convert to floats, scale to range [-1..+1[
-    for (i = 0; i < num; i ++)
+    bytesPerSample = header.format.bits_per_sample / 8;
+    if ((bytesPerSample < 1) || (bytesPerSample > 4))
    {
-        buffer[i] = (float)(fscale * (double)temp[i]);
+        stringstream ss;
+        ss << "\nOnly 8/16/24/32 bit sample WAV files supported. Can't open WAV file with ";
+        ss << (int)header.format.bits_per_sample;
+        ss << " bit sample format. ";
+        ST_THROW_RT_ERROR(ss.str().c_str());
    }

-    delete[] temp;
+    numBytes = maxElems * bytesPerSample;
+    afterDataRead = dataRead + numBytes;
+    if (afterDataRead > header.data.data_len) 
+    {
+        // Don't read more samples than are marked available in header
+        numBytes = (int)header.data.data_len - (int)dataRead;
+        assert(numBytes >= 0);
+    }

-    return num;
+    // read raw data into temporary buffer
+    char *temp = (char*)getConvBuffer(numBytes);
+    numBytes = (int)fread(temp, 1, numBytes, fptr);
+    dataRead += numBytes;
+
+    numElems = numBytes / bytesPerSample;
+
+    // swap byte ordert & convert to float, depending on sample format
+    switch (bytesPerSample)
+    {
+        case 1:
+        {
+            unsigned char *temp2 = (unsigned char*)temp;
+            double conv = 1.0 / 128.0;
+            for (int i = 0; i < numElems; i ++)
+            {
+                buffer[i] = (float)(temp2[i] * conv - 1.0);
+            }
+            break;
+        }
+
+        case 2:
+        {
+            short *temp2 = (short*)temp;
+            double conv = 1.0 / 32768.0;
+            for (int i = 0; i < numElems; i ++)
+            {
+                short value = temp2[i];
+                buffer[i] = (float)(_swap16(value) * conv);
+            }
+            break;
+        }
+
+        case 3:
+        {
+            char *temp2 = (char *)temp;
+            double conv = 1.0 / 8388608.0;
+            for (int i = 0; i < numElems; i ++)
+            {
+                int value = *((int*)temp2);
+                value = _swap32(value) & 0x00ffffff;             // take 24 bits
+                value |= (value & 0x00800000) ? 0xff000000 : 0;  // extend minus sign bits
+                buffer[i] = (float)(value * conv);
+                temp2 += 3;
+            }
+            break;
+        }
+
+        case 4:
+        {
+            int *temp2 = (int *)temp;
+            double conv = 1.0 / 2147483648.0;
+            assert(sizeof(int) == 4);
+            for (int i = 0; i < numElems; i ++)
+            {
+                int value = temp2[i];
+                buffer[i] = (float)(_swap32(value) * conv);
+            }
+            break;
+        }
+    }
+
+    return numElems;
 }


@ -313,13 +462,6 @@ int WavInFile::eof() const
 }


-void WavInFile::close()
-{
-    fclose(fptr);
-    fptr = NULL;
-}
-
-

 // test if character code is between a white space ' ' and little 'z'
 static int isAlpha(char c)
@ -329,9 +471,9 @@ static int isAlpha(char c)


 // test if all characters are between a white space ' ' and little 'z'
-static int isAlphaStr(char *str)
+static int isAlphaStr(const char *str)
 {
-    int c;
+    char c;

    c = str[0];
    while (c) 
@ -347,10 +489,10 @@ static int isAlphaStr(char *str)

 int WavInFile::readRIFFBlock()
 {
-    fread(&(header.riff), sizeof(WavRiff), 1, fptr);
+    if (fread(&(header.riff), sizeof(WavRiff), 1, fptr) != 1) return -1;

    // swap 32bit data byte order if necessary
-    _swap32((unsigned int &)header.riff.package_len);
+    _swap32((int &)header.riff.package_len);

    // header.riff.riff_char should equal to 'RIFF');
    if (memcmp(riffStr, header.riff.riff_char, 4) != 0) return -1;
@ -369,7 +511,7 @@ int WavInFile::readHeaderBlock()
    string sLabel;

    // lead label string
-    fread(label, 1, 4, fptr);
+    if (fread(label, 1, 4, fptr) !=4) return -1;
    label[4] = 0;

    if (isAlphaStr(label) == 0) return -1;    // not a valid label
@ -383,13 +525,13 @@ int WavInFile::readHeaderBlock()
        memcpy(header.format.fmt, fmtStr, 4);

        // read length of the format field
-        fread(&nLen, sizeof(int), 1, fptr);
+        if (fread(&nLen, sizeof(int), 1, fptr) != 1) return -1;
        // swap byte order if necessary
-        _swap32((unsigned int &)nLen); // int format_len;
+        _swap32(nLen); // int format_len;
        header.format.format_len = nLen;

        // calculate how much length differs from expected
-        nDump = nLen - (sizeof(header.format) - 8);
+        nDump = nLen - ((int)sizeof(header.format) - 8);

        // if format_len is larger than expected, read only as much data as we've space for
        if (nDump > 0)
@ -398,15 +540,15 @@ int WavInFile::readHeaderBlock()
        }

        // read data
-        fread(&(header.format.fixed), nLen, 1, fptr);
+        if (fread(&(header.format.fixed), nLen, 1, fptr) != 1) return -1;

        // swap byte order if necessary
-        _swap16((unsigned short &)header.format.fixed);            // short int fixed;
-        _swap16((unsigned short &)header.format.channel_number);   // short int channel_number;
-        _swap32((unsigned int   &)header.format.sample_rate);      // int sample_rate;
-        _swap32((unsigned int   &)header.format.byte_rate);        // int byte_rate;
-        _swap16((unsigned short &)header.format.byte_per_sample);  // short int byte_per_sample;
-        _swap16((unsigned short &)header.format.bits_per_sample);  // short int bits_per_sample;
+        _swap16(header.format.fixed);            // short int fixed;
+        _swap16(header.format.channel_number);   // short int channel_number;
+        _swap32((int &)header.format.sample_rate);      // int sample_rate;
+        _swap32((int &)header.format.byte_rate);        // int byte_rate;
+        _swap16(header.format.byte_per_sample);  // short int byte_per_sample;
+        _swap16(header.format.bits_per_sample);  // short int bits_per_sample;

        // if format_len is larger than expected, skip the extra data
        if (nDump > 0)
@ -420,10 +562,10 @@ int WavInFile::readHeaderBlock()
    {
        // 'data' block
        memcpy(header.data.data_field, dataStr, 4);
-        fread(&(header.data.data_len), sizeof(uint), 1, fptr);
+        if (fread(&(header.data.data_len), sizeof(uint), 1, fptr) != 1) return -1;

        // swap byte order if necessary
-        _swap32((unsigned int &)header.data.data_len);
+        _swap32((int &)header.data.data_len);

        return 1;
    }
@ -434,11 +576,11 @@ int WavInFile::readHeaderBlock()
        // unknown block

        // read length
-        fread(&len, sizeof(len), 1, fptr);
+        if (fread(&len, sizeof(len), 1, fptr) != 1) return -1;
        // scan through the block
        for (i = 0; i < len; i ++)
        {
-            fread(&temp, 1, 1, fptr);
+            if (fread(&temp, 1, 1, fptr) != 1) return -1;
            if (feof(fptr)) return -1;   // unexpected eof
        }
    }
@ -499,23 +641,31 @@ uint WavInFile::getDataSizeInBytes() const

 uint WavInFile::getNumSamples() const
 {
-    return header.data.data_len / header.format.byte_per_sample;
+    if (header.format.byte_per_sample == 0) return 0;
+    return header.data.data_len / (unsigned short)header.format.byte_per_sample;
 }


 uint WavInFile::getLengthMS() const
 {
-   uint numSamples;
-   uint sampleRate;
+    double numSamples;
+    double sampleRate;

-   numSamples = getNumSamples();
-   sampleRate = getSampleRate();
+    numSamples = (double)getNumSamples();
+    sampleRate = (double)getSampleRate();

-   assert(numSamples < UINT_MAX / 1000);
-   return (1000 * numSamples / sampleRate);
+    return (uint)(1000.0 * numSamples / sampleRate + 0.5);
 }


+/// Returns how many milliseconds of audio have so far been read from the file
+uint WavInFile::getElapsedMS() const
+{
+    return (uint)(1000.0 * (double)dataRead / (double)header.format.byte_rate);
+}
+
+
+
 //////////////////////////////////////////////////////////////////////////////
 //
 // Class WavOutFile
@ -531,20 +681,35 @@ WavOutFile::WavOutFile(const char *fileName, int sampleRate, int bits, int chann
        msg += fileName;
        msg += "\" for writing.";
        //pmsg = msg.c_str;
-        throw runtime_error(msg);
+        ST_THROW_RT_ERROR(msg.c_str());
+    }
+
+    fillInHeader(sampleRate, bits, channels);
+    writeHeader();
+}
+
+
+WavOutFile::WavOutFile(FILE *file, int sampleRate, int bits, int channels)
+{
+    bytesWritten = 0;
+    fptr = file;
+    if (fptr == NULL) 
+    {
+        string msg = "Error : Unable to access output file stream.";
+        ST_THROW_RT_ERROR(msg.c_str());
    }

    fillInHeader(sampleRate, bits, channels);
    writeHeader();
-    
-    flushTime = flushRate;
 }



 WavOutFile::~WavOutFile()
 {
-    close();
+    finishHeader();
+    if (fptr) fclose(fptr);
+    fptr = NULL;
 }


@ -560,7 +725,6 @@ void WavOutFile::fillInHeader(uint sampleRate, uint bits, uint channels)
    // copy string 'WAVE' to wave
    memcpy(&(header.riff.wave), waveStr, 4);

-
    // fill in the 'format' part..

    // copy string 'fmt ' to fmt
@ -569,11 +733,11 @@ void WavOutFile::fillInHeader(uint sampleRate, uint bits, uint channels)
    header.format.format_len = 0x10;
    header.format.fixed = 1;
    header.format.channel_number = (short)channels;
-    header.format.sample_rate = sampleRate;
+    header.format.sample_rate = (int)sampleRate;
    header.format.bits_per_sample = (short)bits;
    header.format.byte_per_sample = (short)(bits * channels / 8);
-    header.format.byte_rate = header.format.byte_per_sample * sampleRate;
-    header.format.sample_rate = sampleRate;
+    header.format.byte_rate = header.format.byte_per_sample * (int)sampleRate;
+    header.format.sample_rate = (int)sampleRate;

    // fill in the 'data' part..

@ -598,66 +762,55 @@ void WavOutFile::finishHeader()
 void WavOutFile::writeHeader()
 {
    WavHeader hdrTemp;
+    int res;

    // swap byte order if necessary
    hdrTemp = header;
-    _swap32((unsigned int   &)hdrTemp.riff.package_len);
-    _swap32((unsigned int   &)hdrTemp.format.format_len);
-    _swap16((unsigned short &)hdrTemp.format.fixed);
-    _swap16((unsigned short &)hdrTemp.format.channel_number);
-    _swap32((unsigned int   &)hdrTemp.format.sample_rate);
-    _swap32((unsigned int   &)hdrTemp.format.byte_rate);
-    _swap16((unsigned short &)hdrTemp.format.byte_per_sample);
-    _swap16((unsigned short &)hdrTemp.format.bits_per_sample);
-    _swap32((unsigned int   &)hdrTemp.data.data_len);
+    _swap32((int &)hdrTemp.riff.package_len);
+    _swap32((int &)hdrTemp.format.format_len);
+    _swap16((short &)hdrTemp.format.fixed);
+    _swap16((short &)hdrTemp.format.channel_number);
+    _swap32((int &)hdrTemp.format.sample_rate);
+    _swap32((int &)hdrTemp.format.byte_rate);
+    _swap16((short &)hdrTemp.format.byte_per_sample);
+    _swap16((short &)hdrTemp.format.bits_per_sample);
+    _swap32((int &)hdrTemp.data.data_len);

    // write the supplemented header in the beginning of the file
    fseek(fptr, 0, SEEK_SET);
-    fwrite(&hdrTemp, sizeof(hdrTemp), 1, fptr);
+    res = (int)fwrite(&hdrTemp, sizeof(hdrTemp), 1, fptr);
+    if (res != 1)
+    {
+        ST_THROW_RT_ERROR("Error while writing to a wav file.");
+    }
+
    // jump back to the end of the file
    fseek(fptr, 0, SEEK_END);
 }



-void WavOutFile::close()
-{
-    finishHeader();
-    fclose(fptr);
-    fptr = NULL;
-}
-
-void WavOutFile::flush( int numElems )
-{
-	flushTime -= numElems;
-	if( flushTime < 0 )
-	{
-		flushTime += flushRate;
-		finishHeader();
-	}
-}
-
-void WavOutFile::write(const char *buffer, int numElems)
+void WavOutFile::write(const unsigned char *buffer, int numElems)
 {
    int res;

    if (header.format.bits_per_sample != 8)
    {
-        throw runtime_error("Error: WavOutFile::write(const char*, int) accepts only 8bit samples.");
+        ST_THROW_RT_ERROR("Error: WavOutFile::write(const char*, int) accepts only 8bit samples.");
    }
    assert(sizeof(char) == 1);

-    res = fwrite(buffer, 1, numElems, fptr);
+    res = (int)fwrite(buffer, 1, numElems, fptr);
    if (res != numElems) 
    {
-        throw runtime_error("Error while writing to a wav file.");
+        ST_THROW_RT_ERROR("Error while writing to a wav file.");
    }

    bytesWritten += numElems;
-	flush( numElems );
 }


+
 void WavOutFile::write(const short *buffer, int numElems)
 {
    int res;
@ -665,64 +818,134 @@ void WavOutFile::write(const short *buffer, int numElems)
    // 16 bit samples
    if (numElems < 1) return;   // nothing to do

-    if (header.format.bits_per_sample == 8)
+    switch (header.format.bits_per_sample)
    {
-        int i;
-        char *temp = new char[numElems];
-        // convert from 16bit format to 8bit format
-        for (i = 0; i < numElems; i ++)
+        case 8:
        {
-            temp[i] = buffer[i] >> 8;
+            int i;
+            unsigned char *temp = (unsigned char *)getConvBuffer(numElems);
+            // convert from 16bit format to 8bit format
+            for (i = 0; i < numElems; i ++)
+            {
+                temp[i] = (unsigned char)(buffer[i] / 256 + 128);
+            }
+            // write in 8bit format
+            write(temp, numElems);
+            break;
+        }
+
+        case 16:
+        {
+            // 16bit format
+
+            // use temp buffer to swap byte order if necessary
+            short *pTemp = (short *)getConvBuffer(numElems * sizeof(short));
+            memcpy(pTemp, buffer, numElems * 2);
+            _swap16Buffer(pTemp, numElems);
+
+            res = (int)fwrite(pTemp, 2, numElems, fptr);
+
+            if (res != numElems) 
+            {
+                ST_THROW_RT_ERROR("Error while writing to a wav file.");
+            }
+            bytesWritten += 2 * numElems;
+            break;
+        }
+
+        default:
+        {
+            stringstream ss;
+            ss << "\nOnly 8/16 bit sample WAV files supported in integer compilation. Can't open WAV file with ";
+            ss << (int)header.format.bits_per_sample;
+            ss << " bit sample format. ";
+            ST_THROW_RT_ERROR(ss.str().c_str());
        }
-        // write in 8bit format
-        write(temp, numElems);
-        delete[] temp;
    }
-    else
+}
+
+
+/// Convert from float to integer and saturate
+inline int saturate(float fvalue, float minval, float maxval)
+{
+    if (fvalue > maxval) 
    {
-        // 16bit format
-        unsigned short *pTemp = new unsigned short[numElems];
-
-        assert(header.format.bits_per_sample == 16);
-
-        // allocate temp buffer to swap byte order if necessary
-        memcpy(pTemp, buffer, numElems * 2);
-        _swap16Buffer(pTemp, numElems);
-
-        res = fwrite(pTemp, 2, numElems, fptr);
-
-        delete[] pTemp;
-
-        if (res != numElems) 
-        {
-            throw runtime_error("Error while writing to a wav file.");
-        }
-        bytesWritten += 2 * numElems;
-		flush( numElems*2 );
+        fvalue = maxval;
+    } 
+    else if (fvalue < minval)
+    {
+        fvalue = minval;
    }
+    return (int)fvalue;
 }


 void WavOutFile::write(const float *buffer, int numElems)
 {
-    int i;
-    short *temp = new short[numElems];
-    int iTemp;
+    int numBytes;
+    int bytesPerSample;

-    // convert to 16 bit integer
-    for (i = 0; i < numElems; i ++)
+    if (numElems == 0) return;
+
+    bytesPerSample = header.format.bits_per_sample / 8;
+    numBytes = numElems * bytesPerSample;
+    short *temp = (short*)getConvBuffer(numBytes);
+
+    switch (bytesPerSample)
    {
-        // convert to integer
-        iTemp = (int)(32768.0f * buffer[i]);
+        case 1:
+        {
+            unsigned char *temp2 = (unsigned char *)temp;
+            for (int i = 0; i < numElems; i ++)
+            {
+                temp2[i] = (unsigned char)saturate(buffer[i] * 128.0f + 128.0f, 0.0f, 255.0f);
+            }
+            break;
+        }

-        // saturate
-        if (iTemp < -32768) iTemp = -32768;
-        if (iTemp > 32767)  iTemp = 32767;
-        temp[i] = (short)iTemp;
+        case 2:
+        {
+            short *temp2 = (short *)temp;
+            for (int i = 0; i < numElems; i ++)
+            {
+                short value = (short)saturate(buffer[i] * 32768.0f, -32768.0f, 32767.0f);
+                temp2[i] = _swap16(value);
+            }
+            break;
+        }
+
+        case 3:
+        {
+            char *temp2 = (char *)temp;
+            for (int i = 0; i < numElems; i ++)
+            {
+                int value = saturate(buffer[i] * 8388608.0f, -8388608.0f, 8388607.0f);
+                *((int*)temp2) = _swap32(value);
+                temp2 += 3;
+            }
+            break;
+        }
+
+        case 4:
+        {
+            int *temp2 = (int *)temp;
+            for (int i = 0; i < numElems; i ++)
+            {
+                int value = saturate(buffer[i] * 2147483648.0f, -2147483648.0f, 2147483647.0f);
+                temp2[i] = _swap32(value);
+            }
+            break;
+        }
+
+        default:
+            assert(false);
    }

-    write(temp, numElems);
-	flush( numElems );
+    int res = (int)fwrite(temp, 1, numBytes, fptr);

-    delete[] temp;
+    if (res != numBytes) 
+    {
+        ST_THROW_RT_ERROR("Error while writing to a wav file.");
+    }
+    bytesWritten += numBytes;
 }
--- a/desmume/src/metaspu/SoundTouch/WavFile.h
+++ b/desmume/src/metaspu/SoundTouch/WavFile.h
@ -16,10 +16,10 @@
 ///
 ////////////////////////////////////////////////////////////////////////////////
 //
-// Last changed  : $Date: 2006/02/05 16:44:06 $
-// File revision : $Revision: 1.7 $
+// Last changed  : $Date: 2012-09-01 04:57:22 -0300 (sáb, 01 set 2012) $
+// File revision : $Revision: 4 $
 //
-// $Id: WavFile.h,v 1.7 2006/02/05 16:44:06 Olli Exp $
+// $Id: WavFile.h 153 2012-09-01 07:57:22Z oparviai $
 //
 ////////////////////////////////////////////////////////////////////////////////
 //
@ -92,26 +92,49 @@ typedef struct
 } WavHeader;


+/// Base class for processing WAV audio files.
+class WavFileBase
+{
+private:
+    /// Conversion working buffer;
+    char *convBuff;
+    int convBuffSize;
+
+protected:
+    WavFileBase();
+    virtual ~WavFileBase();
+
+    /// Get pointer to conversion buffer of at min. given size
+    void *getConvBuffer(int sizeByte);
+};
+
+
 /// Class for reading WAV audio files.
-class WavInFile
+class WavInFile : protected WavFileBase
 {
 private:
    /// File pointer.
    FILE *fptr;

+    /// Position within the audio stream
+    long position;
+
    /// Counter of how many bytes of sample data have been read from the file.
-    uint dataRead;
+    long dataRead;

    /// WAV header information
    WavHeader header;

+    /// Init the WAV file stream
+    void init();
+
    /// Read WAV file headers.
    /// \return zero if all ok, nonzero if file format is invalid.
    int readWavHeaders();

    /// Checks WAV file header tags.
    /// \return zero if all ok, nonzero if file format is invalid.
-    int checkCharTags();
+    int checkCharTags() const;

    /// Reads a single WAV file header block.
    /// \return zero if all ok, nonzero if file format is invalid.
@ -125,13 +148,11 @@ public:
    /// throws 'runtime_error' exception.
    WavInFile(const char *filename);

+    WavInFile(FILE *file);
+
    /// Destructor: Closes the file.
    ~WavInFile();

-    /// Close the file. Notice that file is automatically closed also when the 
-    /// class instance is deleted.
-    void close();
-
    /// Rewind to beginning of the file
    void rewind();

@ -157,12 +178,17 @@ public:
    /// Get the audio file length in milliseconds
    uint getLengthMS() const;

+    /// Returns how many milliseconds of audio have so far been read from the file
+    ///
+    /// \return elapsed duration in milliseconds
+    uint getElapsedMS() const;
+
    /// Reads audio samples from the WAV file. This routine works only for 8 bit samples.
    /// Reads given number of elements from the file or if end-of-file reached, as many 
    /// elements as are left in the file.
    ///
    /// \return Number of 8-bit integers read from the file.
-    int read(char *buffer, int maxElems);
+    int read(unsigned char *buffer, int maxElems);

    /// Reads audio samples from the WAV file to 16 bit integer format. Reads given number 
    /// of elements from the file or if end-of-file reached, as many elements as are 
@ -176,6 +202,7 @@ public:
    /// Reads audio samples from the WAV file to floating point format, converting 
    /// sample values to range [-1,1[. Reads given number of elements from the file
    /// or if end-of-file reached, as many elements as are left in the file.
+    /// Notice that reading in float format supports 8/16/24/32bit sample formats.
    ///
    /// \return Number of elements read from the file.
    int read(float *buffer,     ///< Pointer to buffer where to read data.
@ -191,7 +218,7 @@ public:


 /// Class for writing WAV audio files.
-class WavOutFile
+class WavOutFile : protected WavFileBase
 {
 private:
    /// Pointer to the WAV file
@ -203,9 +230,6 @@ private:
    /// Counter of how many bytes have been written to the file so far.
    int bytesWritten;

-	/// number of bytes to be written before next flush.
-	int flushTime;
-
    /// Fills in WAV file header information.
    void fillInHeader(const uint sampleRate, const uint bits, const uint channels);

@ -216,14 +240,6 @@ private:
    /// Writes the WAV file header.
    void writeHeader();

-	/// Flushes the WAV file every so often -- writes header info for the current
-	/// data length and then returns the seek position to the end of the WAV for
-	/// continued writing.  This method is called from each write() method.
-	void flush( int numElems );
-
-	/// Flush the WAVheader every 32kb written
-	static const int flushRate = 0x8000;
-
 public:
    /// Constructor: Creates a new WAV file. Throws a 'runtime_error' exception 
    /// if file creation fails.
@ -233,13 +249,15 @@ public:
               int channels             ///< Number of channels (1=mono, 2=stereo)
               );

+    WavOutFile(FILE *file, int sampleRate, int bits, int channels);
+
    /// Destructor: Finalizes & closes the WAV file.
    ~WavOutFile();

    /// Write data to WAV file. This function works only with 8bit samples. 
    /// Throws a 'runtime_error' exception if writing to file fails.
-    void write(const char *buffer,     ///< Pointer to sample data buffer.
-               int numElems             ///< How many array items are to be written to file.
+    void write(const unsigned char *buffer, ///< Pointer to sample data buffer.
+               int numElems                 ///< How many array items are to be written to file.
               );

    /// Write data to WAV file. Throws a 'runtime_error' exception if writing to
@ -253,12 +271,6 @@ public:
    void write(const float *buffer,     ///< Pointer to sample data buffer.
               int numElems             ///< How many array items are to be written to file.
               );
-
-    /// Finalize & close the WAV file. Automatically supplements the WAV file header
-    /// information according to written data etc.
-    ///
-    /// Notice that file is automatically closed also when the class instance is deleted.
-    void close();
 };

 #endif
--- a/desmume/src/metaspu/SoundTouch/mmx_optimized.cpp
+++ b/desmume/src/metaspu/SoundTouch/mmx_optimized.cpp
@ -12,7 +12,7 @@
 /// NOTICE: If using Visual Studio 6.0, you'll need to install the "Visual C++ 
 /// 6.0 processor pack" update to support compiler intrinsic syntax. The update
 /// is available for download at Microsoft Developers Network, see here:
-/// http://msdn.microsoft.com/vstudio/downloads/tools/ppack/default.aspx
+/// http://msdn.microsoft.com/en-us/vstudio/aa718349.aspx
 ///
 /// Author        : Copyright (c) Olli Parviainen
 /// Author e-mail : oparviai 'at' iki.fi
@ -20,10 +20,10 @@
 ///
 ////////////////////////////////////////////////////////////////////////////////
 //
-// Last changed  : $Date: 2006/02/06 18:52:43 $
-// File revision : $Revision: 1.1 $
+// Last changed  : $Date: 2012-11-08 16:53:01 -0200 (qui, 08 nov 2012) $
+// File revision : $Revision: 4 $
 //
-// $Id: mmx_optimized.cpp,v 1.1 2006/02/06 18:52:43 Olli Exp $
+// $Id: mmx_optimized.cpp 160 2012-11-08 18:53:01Z oparviai $
 //
 ////////////////////////////////////////////////////////////////////////////////
 //
@ -50,13 +50,9 @@

 #include "STTypes.h"

-#ifdef ALLOW_MMX
+#ifdef SOUNDTOUCH_ALLOW_MMX
 // MMX routines available only with integer sample type

-#if !(_WIN32 || __i386__ || __x86_64__)
-#error "wrong platform - this source code file is exclusively for x86 platforms"
-#endif
-
 using namespace soundtouch;

 //////////////////////////////////////////////////////////////////////////////
@ -68,28 +64,29 @@ using namespace soundtouch;
 #include "TDStretch.h"
 #include <mmintrin.h>
 #include <limits.h>
+#include <math.h>


 // Calculates cross correlation of two buffers
-long TDStretchMMX::calcCrossCorrStereo(const short *pV1, const short *pV2) const
+double TDStretchMMX::calcCrossCorr(const short *pV1, const short *pV2) const
 {
    const __m64 *pVec1, *pVec2;
    __m64 shifter;
-    __m64 accu;
-    long corr;
-    uint i;
+    __m64 accu, normaccu;
+    long corr, norm;
+    int i;
   
    pVec1 = (__m64*)pV1;
    pVec2 = (__m64*)pV2;

    shifter = _m_from_int(overlapDividerBits);
-    accu = _mm_setzero_si64();
+    normaccu = accu = _mm_setzero_si64();

-    // Process 4 parallel sets of 2 * stereo samples each during each 
-    // round to improve CPU-level parallellization.
-    for (i = 0; i < overlapLength / 8; i ++)
+    // Process 4 parallel sets of 2 * stereo samples or 4 * mono samples 
+    // during each round for improved CPU-level parallellization.
+    for (i = 0; i < channels * overlapLength / 16; i ++)
    {
-        __m64 temp;
+        __m64 temp, temp2;

        // dictionary of instructions:
        // _m_pmaddwd   : 4*16bit multiply-add, resulting two 32bits = [a0*b0+a1*b1 ; a2*b2+a3*b3]
@ -98,11 +95,17 @@ long TDStretchMMX::calcCrossCorrStereo(const short *pV1, const short *pV2) const

        temp = _mm_add_pi32(_mm_madd_pi16(pVec1[0], pVec2[0]),
                            _mm_madd_pi16(pVec1[1], pVec2[1]));
+        temp2 = _mm_add_pi32(_mm_madd_pi16(pVec1[0], pVec1[0]),
+                             _mm_madd_pi16(pVec1[1], pVec1[1]));
        accu = _mm_add_pi32(accu, _mm_sra_pi32(temp, shifter));
+        normaccu = _mm_add_pi32(normaccu, _mm_sra_pi32(temp2, shifter));

        temp = _mm_add_pi32(_mm_madd_pi16(pVec1[2], pVec2[2]),
                            _mm_madd_pi16(pVec1[3], pVec2[3]));
+        temp2 = _mm_add_pi32(_mm_madd_pi16(pVec1[2], pVec1[2]),
+                             _mm_madd_pi16(pVec1[3], pVec1[3]));
        accu = _mm_add_pi32(accu, _mm_sra_pi32(temp, shifter));
+        normaccu = _mm_add_pi32(normaccu, _mm_sra_pi32(temp2, shifter));

        pVec1 += 4;
        pVec2 += 4;
@ -114,10 +117,17 @@ long TDStretchMMX::calcCrossCorrStereo(const short *pV1, const short *pV2) const
    accu = _mm_add_pi32(accu, _mm_srli_si64(accu, 32));
    corr = _m_to_int(accu);

+    normaccu = _mm_add_pi32(normaccu, _mm_srli_si64(normaccu, 32));
+    norm = _m_to_int(normaccu);
+
    // Clear MMS state
    _m_empty();

-    return corr;
+    // Normalize result by dividing by sqrt(norm) - this step is easiest 
+    // done using floating point operation
+    if (norm == 0) norm = 1;    // to avoid div by zero
+
+    return (double)corr / sqrt((double)norm);
    // Note: Warning about the missing EMMS instruction is harmless
    // as it'll be called elsewhere.
 }
@ -139,7 +149,7 @@ void TDStretchMMX::overlapStereo(short *output, const short *input) const
    const __m64 *pVinput, *pVMidBuf;
    __m64 *pVdest;
    __m64 mix1, mix2, adder, shifter;
-    uint i;
+    int i;

    pVinput  = (const __m64*)input;
    pVMidBuf = (const __m64*)pMidBuffer;
@ -154,7 +164,9 @@ void TDStretchMMX::overlapStereo(short *output, const short *input) const
    mix2  = _mm_add_pi16(mix1, adder);
    adder = _mm_add_pi16(adder, adder);

-    shifter = _m_from_int(overlapDividerBits);
+    // Overlaplength-division by shifter. "+1" is to account for "-1" deduced in
+    // overlapDividerBits calculation earlier.
+    shifter = _m_from_int(overlapDividerBits + 1);

    for (i = 0; i < overlapLength / 4; i ++)
    {
@ -227,7 +239,7 @@ void FIRFilterMMX::setCoefficients(const short *coeffs, uint newLength, uint uRe
    // Ensure that filter coeffs array is aligned to 16-byte boundary
    delete[] filterCoeffsUnalign;
    filterCoeffsUnalign = new short[2 * newLength + 8];
-    filterCoeffsAlign = (short *)(((ulongptr)filterCoeffsUnalign + 15) & -16);
+    filterCoeffsAlign = (short *)SOUNDTOUCH_ALIGN_POINTER_16(filterCoeffsUnalign);

    // rearrange the filter coefficients for mmx routines 
    for (i = 0;i < length; i += 4) 
@ -247,7 +259,7 @@ void FIRFilterMMX::setCoefficients(const short *coeffs, uint newLength, uint uRe


 // mmx-optimized version of the filter routine for stereo sound
-uint FIRFilterMMX::evaluateFilterStereo(short *dest, const short *src, const uint numSamples) const
+uint FIRFilterMMX::evaluateFilterStereo(short *dest, const short *src, uint numSamples) const
 {
    // Create stack copies of the needed member variables for asm routines :
    uint i, j;
@ -255,7 +267,7 @@ uint FIRFilterMMX::evaluateFilterStereo(short *dest, const short *src, const uin

    if (length < 2) return 0;

-    for (i = 0; i < numSamples / 2; i ++)
+    for (i = 0; i < (numSamples - length) / 2; i ++)
    {
        __m64 accu1;
        __m64 accu2;
@ -302,4 +314,4 @@ uint FIRFilterMMX::evaluateFilterStereo(short *dest, const short *src, const uin
    return (numSamples & 0xfffffffe) - length;
 }

-#endif  // ALLOW_MMX
+#endif  // SOUNDTOUCH_ALLOW_MMX
--- a/desmume/src/metaspu/SoundTouch/sse_optimized.cpp
+++ b/desmume/src/metaspu/SoundTouch/sse_optimized.cpp
@ -12,7 +12,7 @@
 /// NOTICE: If using Visual Studio 6.0, you'll need to install the "Visual C++ 
 /// 6.0 processor pack" update to support SSE instruction set. The update is 
 /// available for download at Microsoft Developers Network, see here:
-/// http://msdn.microsoft.com/vstudio/downloads/tools/ppack/default.aspx
+/// http://msdn.microsoft.com/en-us/vstudio/aa718349.aspx
 ///
 /// If the above URL is expired or removed, go to "http://msdn.microsoft.com" and 
 /// perform a search with keywords "processor pack".
@ -23,10 +23,10 @@
 ///
 ////////////////////////////////////////////////////////////////////////////////
 //
-// Last changed  : $Date: 2006/02/05 16:44:06 $
-// File revision : $Revision: 1.2 $
+// Last changed  : $Date: 2012-11-08 16:53:01 -0200 (qui, 08 nov 2012) $
+// File revision : $Revision: 4 $
 //
-// $Id: sse_optimized.cpp,v 1.2 2006/02/05 16:44:06 Olli Exp $
+// $Id: sse_optimized.cpp 160 2012-11-08 18:53:01Z oparviai $
 //
 ////////////////////////////////////////////////////////////////////////////////
 //
@ -56,7 +56,7 @@

 using namespace soundtouch;

-#ifdef ALLOW_SSE
+#ifdef SOUNDTOUCH_ALLOW_SSE

 // SSE routines available only with float sample type    

@ -68,12 +68,15 @@ using namespace soundtouch;

 #include "TDStretch.h"
 #include <xmmintrin.h>
+#include <math.h>

 // Calculates cross correlation of two buffers
-double TDStretchSSE::calcCrossCorrStereo(const float *pV1, const float *pV2) const
+double TDStretchSSE::calcCrossCorr(const float *pV1, const float *pV2) const
 {
-    uint i;
-    __m128 vSum, *pVec2;
+    int i;
+    const float *pVec1;
+    const __m128 *pVec2;
+    __m128 vSum, vNorm;

    // Note. It means a major slow-down if the routine needs to tolerate 
    // unaligned __m128 memory accesses. It's way faster if we can skip 
@ -81,16 +84,16 @@ double TDStretchSSE::calcCrossCorrStereo(const float *pV1, const float *pV2) con
    // This can mean up to ~ 10-fold difference (incl. part of which is
    // due to skipping every second round for stereo sound though).
    //
-    // Compile-time define ALLOW_NONEXACT_SIMD_OPTIMIZATION is provided
+    // Compile-time define SOUNDTOUCH_ALLOW_NONEXACT_SIMD_OPTIMIZATION is provided
    // for choosing if this little cheating is allowed.

-#ifdef ALLOW_NONEXACT_SIMD_OPTIMIZATION
+#ifdef SOUNDTOUCH_ALLOW_NONEXACT_SIMD_OPTIMIZATION
    // Little cheating allowed, return valid correlation only for 
    // aligned locations, meaning every second round for stereo sound.

    #define _MM_LOAD    _mm_load_ps

-    if (((ulong)pV1) & 15) return -1e50;    // skip unaligned locations
+    if (((ulongptr)pV1) & 15) return -1e50;    // skip unaligned locations

 #else
    // No cheating allowed, use unaligned load & take the resulting
@ -103,39 +106,54 @@ double TDStretchSSE::calcCrossCorrStereo(const float *pV1, const float *pV2) con

    // Calculates the cross-correlation value between 'pV1' and 'pV2' vectors
    // Note: pV2 _must_ be aligned to 16-bit boundary, pV1 need not.
-    pVec2 = (__m128*)pV2;
-    vSum = _mm_setzero_ps();
+    pVec1 = (const float*)pV1;
+    pVec2 = (const __m128*)pV2;
+    vSum = vNorm = _mm_setzero_ps();

-    // Unroll the loop by factor of 4 * 4 operations
-    for (i = 0; i < overlapLength / 8; i ++) 
+    // Unroll the loop by factor of 4 * 4 operations. Use same routine for
+    // stereo & mono, for mono it just means twice the amount of unrolling.
+    for (i = 0; i < channels * overlapLength / 16; i ++) 
    {
+        __m128 vTemp;
        // vSum += pV1[0..3] * pV2[0..3]
-        vSum = _mm_add_ps(vSum, _mm_mul_ps(_MM_LOAD(pV1),pVec2[0]));
+        vTemp = _MM_LOAD(pVec1);
+        vSum  = _mm_add_ps(vSum,  _mm_mul_ps(vTemp ,pVec2[0]));
+        vNorm = _mm_add_ps(vNorm, _mm_mul_ps(vTemp ,vTemp));

        // vSum += pV1[4..7] * pV2[4..7]
-        vSum = _mm_add_ps(vSum, _mm_mul_ps(_MM_LOAD(pV1 + 4), pVec2[1]));
+        vTemp = _MM_LOAD(pVec1 + 4);
+        vSum  = _mm_add_ps(vSum, _mm_mul_ps(vTemp, pVec2[1]));
+        vNorm = _mm_add_ps(vNorm, _mm_mul_ps(vTemp ,vTemp));

        // vSum += pV1[8..11] * pV2[8..11]
-        vSum = _mm_add_ps(vSum, _mm_mul_ps(_MM_LOAD(pV1 + 8), pVec2[2]));
+        vTemp = _MM_LOAD(pVec1 + 8);
+        vSum  = _mm_add_ps(vSum, _mm_mul_ps(vTemp, pVec2[2]));
+        vNorm = _mm_add_ps(vNorm, _mm_mul_ps(vTemp ,vTemp));

        // vSum += pV1[12..15] * pV2[12..15]
-        vSum = _mm_add_ps(vSum, _mm_mul_ps(_MM_LOAD(pV1 + 12), pVec2[3]));
+        vTemp = _MM_LOAD(pVec1 + 12);
+        vSum  = _mm_add_ps(vSum, _mm_mul_ps(vTemp, pVec2[3]));
+        vNorm = _mm_add_ps(vNorm, _mm_mul_ps(vTemp ,vTemp));

-        pV1 += 16;
+        pVec1 += 16;
        pVec2 += 4;
    }

    // return value = vSum[0] + vSum[1] + vSum[2] + vSum[3]
-    float *pvSum = (float*)&vSum;
-    return (double)(pvSum[0] + pvSum[1] + pvSum[2] + pvSum[3]);
+    float *pvNorm = (float*)&vNorm;
+    double norm = sqrt(pvNorm[0] + pvNorm[1] + pvNorm[2] + pvNorm[3]);
+    if (norm < 1e-9) norm = 1.0;    // to avoid div by zero

-    /* This is approximately corresponding routine in C-language:
-    double corr;
+    float *pvSum = (float*)&vSum;
+    return (double)(pvSum[0] + pvSum[1] + pvSum[2] + pvSum[3]) / norm;
+
+    /* This is approximately corresponding routine in C-language yet without normalization:
+    double corr, norm;
    uint i;

    // Calculates the cross-correlation value between 'pV1' and 'pV2' vectors
-    corr = 0.0;
-    for (i = 0; i < overlapLength / 8; i ++) 
+    corr = norm = 0.0;
+    for (i = 0; i < channels * overlapLength / 16; i ++) 
    {
        corr += pV1[0] * pV2[0] +
                pV1[1] * pV2[1] +
@ -154,77 +172,12 @@ double TDStretchSSE::calcCrossCorrStereo(const float *pV1, const float *pV2) con
                pV1[14] * pV2[14] +
                pV1[15] * pV2[15];

+    for (j = 0; j < 15; j ++) norm += pV1[j] * pV1[j];
+
        pV1 += 16;
        pV2 += 16;
    }
-    */
-
-    /* This is corresponding routine in assembler. This may be teeny-weeny bit faster
-       than intrinsic version, but more difficult to maintain & get compiled on multiple
-       platforms.
-
-    uint overlapLengthLocal = overlapLength;
-    float corr;
-
-    _asm 
-    {
-        // Very important note: data in 'pV2' _must_ be aligned to 
-        // 16-byte boundary!
-
-        // give prefetch hints to CPU of what data are to be needed soonish
-        // give more aggressive hints on pV1 as that changes while pV2 stays
-        // same between runs
-        prefetcht0 [pV1]
-        prefetcht0 [pV2]
-        prefetcht0 [pV1 + 32]
-
-        mov     eax, dword ptr pV1
-        mov     ebx, dword ptr pV2
-
-        xorps   xmm0, xmm0
-
-        mov     ecx, overlapLengthLocal
-        shr     ecx, 3  // div by eight
-
-    loop1:
-        prefetcht0 [eax + 64]     // give a prefetch hint to CPU what data are to be needed soonish
-        prefetcht0 [ebx + 32]     // give a prefetch hint to CPU what data are to be needed soonish
-        movups  xmm1, [eax]
-        mulps   xmm1, [ebx]
-        addps   xmm0, xmm1
-
-        movups  xmm2, [eax + 16]
-        mulps   xmm2, [ebx + 16]
-        addps   xmm0, xmm2
-
-        prefetcht0 [eax + 96]     // give a prefetch hint to CPU what data are to be needed soonish
-        prefetcht0 [ebx + 64]     // give a prefetch hint to CPU what data are to be needed soonish
-
-        movups  xmm3, [eax + 32]
-        mulps   xmm3, [ebx + 32]
-        addps   xmm0, xmm3
-
-        movups  xmm4, [eax + 48]
-        mulps   xmm4, [ebx + 48]
-        addps   xmm0, xmm4
-
-        add     eax, 64
-        add     ebx, 64
-
-        dec     ecx
-        jnz     loop1
-
-        // add the four floats of xmm0 together and return the result. 
-
-        movhlps xmm1, xmm0          // move 3 & 4 of xmm0 to 1 & 2 of xmm1
-        addps   xmm1, xmm0
-        movaps  xmm2, xmm1
-        shufps  xmm2, xmm2, 0x01    // move 2 of xmm2 as 1 of xmm2
-        addss   xmm2, xmm1
-        movss   corr, xmm2
-    }
-
-    return (double)corr;
+    return corr / sqrt(norm);
    */
 }

@ -239,6 +192,7 @@ double TDStretchSSE::calcCrossCorrStereo(const float *pV1, const float *pV2) con

 FIRFilterSSE::FIRFilterSSE() : FIRFilter()
 {
+    filterCoeffsAlign = NULL;
    filterCoeffsUnalign = NULL;
 }

@ -246,6 +200,8 @@ FIRFilterSSE::FIRFilterSSE() : FIRFilter()
 FIRFilterSSE::~FIRFilterSSE()
 {
    delete[] filterCoeffsUnalign;
+    filterCoeffsAlign = NULL;
+    filterCoeffsUnalign = NULL;
 }


@ -258,11 +214,11 @@ void FIRFilterSSE::setCoefficients(const float *coeffs, uint newLength, uint uRe
    FIRFilter::setCoefficients(coeffs, newLength, uResultDivFactor);

    // Scale the filter coefficients so that it won't be necessary to scale the filtering result
-    // also rearrange coefficients suitably for 3DNow!
+    // also rearrange coefficients suitably for SSE
    // Ensure that filter coeffs array is aligned to 16-byte boundary
    delete[] filterCoeffsUnalign;
    filterCoeffsUnalign = new float[2 * newLength + 4];
-    filterCoeffsAlign = (float *)(((unsigned long)filterCoeffsUnalign + 15) & -16);
+    filterCoeffsAlign = (float *)SOUNDTOUCH_ALIGN_POINTER_16(filterCoeffsUnalign);

    fDivider = (float)resultDivider;

@ -279,15 +235,18 @@ void FIRFilterSSE::setCoefficients(const float *coeffs, uint newLength, uint uRe
 // SSE-optimized version of the filter routine for stereo sound
 uint FIRFilterSSE::evaluateFilterStereo(float *dest, const float *source, uint numSamples) const
 {
-    int count = (numSamples - length) & -2;
+    int count = (int)((numSamples - length) & (uint)-2);
    int j;

    assert(count % 2 == 0);

    if (count < 2) return 0;

+    assert(source != NULL);
+    assert(dest != NULL);
    assert((length % 8) == 0);
-    assert(((unsigned long)filterCoeffsAlign) % 16 == 0);
+    assert(filterCoeffsAlign != NULL);
+    assert(((ulongptr)filterCoeffsAlign) % 16 == 0);

    // filter is evaluated for two stereo samples with each iteration, thus use of 'j += 2'
    for (j = 0; j < count; j += 2)
@ -297,9 +256,9 @@ uint FIRFilterSSE::evaluateFilterStereo(float *dest, const float *source, uint n
        __m128 sum1, sum2;
        uint i;

-        pSrc = source;                      // source audio data
-        pFil = (__m128*)filterCoeffsAlign;  // filter coefficients. NOTE: Assumes coefficients 
-                                            // are aligned to 16-byte boundary
+        pSrc = (const float*)source;              // source audio data
+        pFil = (const __m128*)filterCoeffsAlign;  // filter coefficients. NOTE: Assumes coefficients 
+                                                  // are aligned to 16-byte boundary
        sum1 = sum2 = _mm_setzero_ps();

        for (i = 0; i < length / 8; i ++) 
@ -397,88 +356,6 @@ uint FIRFilterSSE::evaluateFilterStereo(float *dest, const float *source, uint n
        dest += 4;
    }
    */
-
-
-    /* Similar routine in assembly, again obsoleted due to maintainability
-    _asm
-    {
-        // Very important note: data in 'src' _must_ be aligned to 
-        // 16-byte boundary!
-        mov     edx, count
-        mov     ebx, dword ptr src
-        mov     eax, dword ptr dest
-        shr     edx, 1
-
-    loop1:
-        // "outer loop" : during each round 2*2 output samples are calculated
-
-        // give prefetch hints to CPU of what data are to be needed soonish
-        prefetcht0 [ebx]
-        prefetcht0 [filterCoeffsLocal]
-
-        mov     esi, ebx
-        mov     edi, filterCoeffsLocal
-        xorps   xmm0, xmm0
-        xorps   xmm1, xmm1
-        mov     ecx, lengthLocal
-
-    loop2:
-        // "inner loop" : during each round eight FIR filter taps are evaluated for 2*2 samples
-        prefetcht0 [esi + 32]     // give a prefetch hint to CPU what data are to be needed soonish
-        prefetcht0 [edi + 32]     // give a prefetch hint to CPU what data are to be needed soonish
-
-        movups  xmm2, [esi]         // possibly unaligned load
-        movups  xmm3, [esi + 8]     // possibly unaligned load
-        mulps   xmm2, [edi]
-        mulps   xmm3, [edi]
-        addps   xmm0, xmm2
-        addps   xmm1, xmm3
-
-        movups  xmm4, [esi + 16]    // possibly unaligned load
-        movups  xmm5, [esi + 24]    // possibly unaligned load
-        mulps   xmm4, [edi + 16]
-        mulps   xmm5, [edi + 16]
-        addps   xmm0, xmm4
-        addps   xmm1, xmm5
-
-        prefetcht0 [esi + 64]     // give a prefetch hint to CPU what data are to be needed soonish
-        prefetcht0 [edi + 64]     // give a prefetch hint to CPU what data are to be needed soonish
-
-        movups  xmm6, [esi + 32]    // possibly unaligned load
-        movups  xmm7, [esi + 40]    // possibly unaligned load
-        mulps   xmm6, [edi + 32]
-        mulps   xmm7, [edi + 32]
-        addps   xmm0, xmm6
-        addps   xmm1, xmm7
-
-        movups  xmm4, [esi + 48]    // possibly unaligned load
-        movups  xmm5, [esi + 56]    // possibly unaligned load
-        mulps   xmm4, [edi + 48]
-        mulps   xmm5, [edi + 48]
-        addps   xmm0, xmm4
-        addps   xmm1, xmm5
-
-        add     esi, 64
-        add     edi, 64
-        dec     ecx
-        jnz     loop2
-
-        // Now xmm0 and xmm1 both have a filtered 2-channel sample each, but we still need
-        // to sum the two hi- and lo-floats of these registers together.
-
-        movhlps xmm2, xmm0          // xmm2 = xmm2_3 xmm2_2 xmm0_3 xmm0_2
-        movlhps xmm2, xmm1          // xmm2 = xmm1_1 xmm1_0 xmm0_3 xmm0_2
-        shufps  xmm0, xmm1, 0xe4    // xmm0 = xmm1_3 xmm1_2 xmm0_1 xmm0_0
-        addps   xmm0, xmm2
-
-        movaps  [eax], xmm0
-        add     ebx, 16
-        add     eax, 16
-
-        dec     edx
-        jnz     loop1
-    }
-    */
 }

-#endif  // ALLOW_SSE
+#endif  // SOUNDTOUCH_ALLOW_SSE