pcsx2/plugins/spu2-x/src/Timestretcher.cpp

545 lines
20 KiB
C++

/* SPU2-X, A plugin for Emulating the Sound Processing Unit of the Playstation 2
* Developed and maintained by the Pcsx2 Development Team.
*
* Original portions from SPU2ghz are (c) 2008 by David Quintana [gigaherz]
*
* SPU2-X is free software: you can redistribute it and/or modify it under the terms
* of the GNU Lesser General Public License as published by the Free Software Found-
* ation, either version 3 of the License, or (at your option) any later version.
*
* SPU2-X is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE. See the GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with SPU2-X. If not, see <http://www.gnu.org/licenses/>.
*/
#include "Global.h"
#include "soundtouch/SoundTouch.h"
#include <wx/datetime.h>
#include <algorithm>
//Uncomment the next line to use the old time stretcher
//#define SPU2X_USE_OLD_STRETCHER
static soundtouch::SoundTouch* pSoundTouch = NULL;
// data prediction amount, used to "commit" data that hasn't
// finished timestretch processing.
s32 SndBuffer::m_predictData;
// records last buffer status (fill %, range -100 to 100, with 0 being 50% full)
float SndBuffer::lastPct;
float SndBuffer::lastEmergencyAdj;
float SndBuffer::cTempo = 1;
float SndBuffer::eTempo = 1;
void SndBuffer::PredictDataWrite( int samples )
{
m_predictData += samples;
}
// Calculate the buffer status percentage.
// Returns range from -1.0 to 1.0
// 1.0 = buffer overflow!
// 0.0 = buffer nominal (50% full)
// -1.0 = buffer underflow!
float SndBuffer::GetStatusPct()
{
// Get the buffer status of the output driver too, so that we can
// obtain a more accurate overall buffer status.
int drvempty = mods[OutputModule]->GetEmptySampleCount(); // / 2;
//ConLog( "Data %d >>> driver: %d predict: %d\n", m_data, drvempty, m_predictData );
int data = _GetApproximateDataInBuffer();
float result = (float)( data + m_predictData - drvempty) - (m_size/16);
result /= (m_size/16);
return result;
}
//Alternative simple tempo adjustment. Based only on the soundtouch buffer state.
//Base algorithm: aim at specific average number of samples at the buffer (by GUI), and adjust tempo simply by current/target.
//An extra mechanism is added to keep adjustment at perfect 1:1 ratio (when emulation speed is stable around 100%)
// to prevent constant stretching/shrinking of packets if possible.
// This mechanism is triggered when the adjustment is close to 1:1 for long enough (defaults to 100 iterations within hys_ok_factor - defaults to 3%).
// 1:1 state is aborted when required adjustment goes beyond hys_bad_factor (defaults to 20%).
//
//To compensate for wide variation of the <num of samples> ratio due to relatively small size of the buffer,
// The required tempo is a running average of STRETCH_AVERAGE_LEN (defaults to 50) last calculations.
// This averaging slows down the respons time of the algorithm, but greatly stablize it towards steady stretching.
//
//Keeping the buffer at required latency:
// This algorithm stabilises when the actual latency is <speed>*<required_latency>. While this is just fine at 100% speed,
// it's problematic especially for slow speeds, as the number of actual samples at the buffer gets very small on that case,
// which may lead to underruns (or just too much latency when running very fast).
//To compensate for that, the algorithm has a slowly moving compensation factor which will eventually bring the actual latency to the required one.
//compensationDivider defines how slow this compensation changes. By default it's set to 100,
// which will finalize the compensation after about 200 iterations.
//
// Note, this algorithm is intentionally simplified by not taking extreme actions at extreme scenarios (mostly underruns when speed drops sharply),
// and let's the overrun/underrun protections do what they should (doesn't happen much though in practice, even at big FPS variations).
//
// These params were tested to show good respond and stability, on all audio systems (dsound, wav, port audio, xaudio2),
// even at extreme small latency of 50ms which can handle 50%-100% variations without audible glitches.
int targetIPS=750;
//Dynamic tuning changes the values of the base algorithm parameters (derived from targetIPS) to adapt, in real time, to
// different number of invocations/sec (mostly affects number of iterations to average).
// Dynamic tuning can have a slight negative effect on the behavior of the algorithm, so it's preferred to have it off.
//Currently it looks like it's around 750/sec on all systems when playing at 100% speed (50/60fps),
// and proportional to that speed otherwise.
//If changes are made to SPU2X which affects this number (but it's still the same on all systems), then just change targetIPS.
//If we find out that some systems are very different, we can turn on dynamic tuning by uncommenting the next line.
//#define NEWSTRETCHER_USE_DYNAMIC_TUNING
//Additional performance note: since MAX_STRETCH_AVERAGE_LEN = 128 (or any power of 2), the '%' below
//could be replaced with a faster '&'. The compiler is highly likely to do it since all the values are unsigned.
#define AVERAGING_BUFFER_SIZE 256U
unsigned int AVERAGING_WINDOW = 50.0 * targetIPS/750;
#define STRETCHER_RESET_THRESHOLD 5
int gRequestStretcherReset = STRETCHER_RESET_THRESHOLD;
//Adds a value to the running average buffer, and return the new running average.
float addToAvg(float val){
static float avg_fullness[AVERAGING_BUFFER_SIZE];
static unsigned int nextAvgPos = 0;
static unsigned int available = 0; // Make sure we're not averaging AVERAGING_WINDOW items if we inserted less.
if (gRequestStretcherReset >= STRETCHER_RESET_THRESHOLD)
available = 0;
if (available < AVERAGING_BUFFER_SIZE)
available++;
avg_fullness[nextAvgPos] = val;
nextAvgPos = (nextAvgPos + 1U) % AVERAGING_BUFFER_SIZE;
unsigned int actualWindow = std::min(available, AVERAGING_WINDOW);
unsigned int first = (nextAvgPos - actualWindow + AVERAGING_BUFFER_SIZE)
% AVERAGING_BUFFER_SIZE;
// Possible optimization: if we know that actualWindow hasn't changed since
// last invocation, we could calculate the running average in O(1) instead of O(N)
// by keeping a running sum between invocations, and then
// do "runningSum = runningSum + val - avg_fullness[(first-1)%...]" instead of the following loop.
// Few gotchas: val overwrites first-1, handling actualWindow changes, etc.
// However, this isn't hot code, so unless proven otherwise, we can live with unoptimized code.
float sum = 0;
for(unsigned int i = first; i < first + actualWindow; i++) {
sum += avg_fullness[i % AVERAGING_BUFFER_SIZE];
}
sum = sum / actualWindow;
return sum ? sum : 1; // 1 because that's the 100% perfect speed value
}
template <class T>
bool IsInRange(const T& val, const T& min, const T& max)
{
return ( min <= val && val <= max );
}
//actual stretch algorithm implementation
void SndBuffer::UpdateTempoChangeSoundTouch2()
{
long targetSamplesReservoir=48*SndOutLatencyMS;//48000*SndOutLatencyMS/1000
//base aim at buffer filled %
float baseTargetFullness=(double)targetSamplesReservoir;///(double)m_size;//0.05;
//state vars
static bool inside_hysteresis;//=false;
static int hys_ok_count;//=0;
static float dynamicTargetFullness;//=baseTargetFullness;
if (gRequestStretcherReset >= STRETCHER_RESET_THRESHOLD) {
ConLog("______> stretch: Reset.\n");
inside_hysteresis=false;
hys_ok_count=0;
dynamicTargetFullness=baseTargetFullness;
}
int data = _GetApproximateDataInBuffer();
float bufferFullness=(float)data;///(float)m_size;
#ifdef NEWSTRETCHER_USE_DYNAMIC_TUNING
{//test current iterations/sec every 0.5s, and change algo params accordingly if different than previous IPS more than 30%
static long iters=0;
static wxDateTime last=wxDateTime::UNow();
wxDateTime unow=wxDateTime::UNow();
wxTimeSpan delta = unow.Subtract(last);
if( delta.GetMilliseconds()>500 ){
int pot_targetIPS=1000.0/delta.GetMilliseconds().ToDouble()*iters;
if(!IsInRange(pot_targetIPS, int((float)targetIPS/1.3f), int((float)targetIPS*1.3f)) ){
if(MsgOverruns()) ConLog("Stretcher: setting iters/sec from %d to %d\n", targetIPS, pot_targetIPS);
targetIPS=pot_targetIPS;
AVERAGING_WINDOW=GetClamped((int)(50.0f *(float)targetIPS/750.0f), 3, (int)AVERAGING_BUFFER_SIZE);
}
last=unow;
iters=0;
}
iters++;
}
#endif
//Algorithm params: (threshold params (hysteresis), etc)
const float hys_ok_factor = 1.04f;
const float hys_bad_factor = 1.2f;
int hys_min_ok_count = GetClamped((int)(50.0 *(float)targetIPS/750.0), 2, 100); //consecutive iterations within hys_ok before going to 1:1 mode
int compensationDivider = GetClamped((int)(100.0 *(float)targetIPS/750), 15, 150);
float tempoAdjust=bufferFullness/dynamicTargetFullness;
float avgerage = addToAvg(tempoAdjust);
tempoAdjust = avgerage;
// Dampen the adjustment to avoid overshoots (this means the average will compensate to the other side).
// This is different than simply bigger averaging window since bigger window also has bigger "momentum",
// so it's slower to slow down when it gets close to the equilibrium state and can therefore resonate.
// The dampening (sqrt was chosen for no very good reason) manages to mostly prevent that.
tempoAdjust = sqrt(tempoAdjust);
tempoAdjust = GetClamped( tempoAdjust, 0.05f, 10.0f);
if (tempoAdjust < 1)
baseTargetFullness /= sqrt(tempoAdjust); // slightly increase latency when running slow.
dynamicTargetFullness += (baseTargetFullness/tempoAdjust - dynamicTargetFullness)/(double)compensationDivider;
if( IsInRange(tempoAdjust, 0.9f, 1.1f) && IsInRange( dynamicTargetFullness, baseTargetFullness*0.9f, baseTargetFullness*1.1f) )
dynamicTargetFullness=baseTargetFullness;
if( !inside_hysteresis )
{
if( IsInRange( tempoAdjust, 1.0f/hys_ok_factor, hys_ok_factor ) )
hys_ok_count++;
else
hys_ok_count=0;
if( hys_ok_count >= hys_min_ok_count ){
inside_hysteresis=true;
if(MsgOverruns()) ConLog("======> stretch: None (1:1)\n");
}
}
else if( !IsInRange( tempoAdjust, 1.0f/hys_bad_factor, hys_bad_factor ) ){
if(MsgOverruns()) ConLog("~~~~~~> stretch: Dynamic\n");
inside_hysteresis=false;
hys_ok_count=0;
}
if(inside_hysteresis)
tempoAdjust=1.0;
if(MsgOverruns()){
static int iters=0;
static wxDateTime last=wxDateTime::UNow();
wxDateTime unow=wxDateTime::UNow();
wxTimeSpan delta = unow.Subtract(last);
if(delta.GetMilliseconds()>1000){//report buffers state and tempo adjust every second
ConLog("buffers: %4d ms (%3.0f%%), tempo: %f, comp: %2.3f, iters: %d, (N-IPS:%d -> avg:%d, minokc:%d, div:%d) reset:%d\n",
(int)(data/48), (double)(100.0*bufferFullness/baseTargetFullness), (double)tempoAdjust, (double)(dynamicTargetFullness/baseTargetFullness), iters, (int)targetIPS
, AVERAGING_WINDOW, hys_min_ok_count, compensationDivider, gRequestStretcherReset
);
last=unow;
iters=0;
}
iters++;
}
pSoundTouch->setTempo(tempoAdjust);
if (gRequestStretcherReset >= STRETCHER_RESET_THRESHOLD)
gRequestStretcherReset = 0;
return;
}
void SndBuffer::UpdateTempoChangeSoundTouch()
{
float statusPct = GetStatusPct();
float pctChange = statusPct - lastPct;
float tempoChange;
float emergencyAdj = 0;
float newcee = cTempo; // workspace var. for cTempo
// IMPORTANT!
// If you plan to tweak these values, make sure you're using a release build
// OUTSIDE THE DEBUGGER to test it! The Visual Studio debugger can really cause
// erratic behavior in the audio buffers, and makes the timestretcher seem a
// lot more inconsistent than it really is.
// We have two factors.
// * Distance from nominal buffer status (50% full)
// * The change from previous update to this update.
// Prediction based on the buffer change:
// (linear seems to work better here)
tempoChange = pctChange * 0.75f;
if( statusPct * tempoChange < 0.0f )
{
// only apply tempo change if it is in synch with the buffer status.
// In other words, if the buffer is high (over 0%), and is decreasing,
// ignore it. It'll just muck things up.
tempoChange = 0;
}
// Sudden spikes in framerate can cause the nominal buffer status
// to go critical, in which case we have to enact an emergency
// stretch. The following cubic formulas do that. Values near
// the extremeites give much larger results than those near 0.
// And the value is added only this time, and does not accumulate.
// (otherwise a large value like this would cause problems down the road)
// Constants:
// Weight - weights the statusPct's "emergency" consideration.
// higher values here will make the buffer perform more drastic
// compensations at the outer edges of the buffer (at -75 or +75%
// or beyond, for example).
// Range - scales the adjustment to the given range (more or less).
// The actual range is dependent on the weight used, so if you increase
// Weight you'll usually want to decrease Range somewhat to compensate.
// Prediction based on the buffer fill status:
const float statusWeight = 2.99f;
const float statusRange = 0.068f;
// "non-emergency" deadzone: In this area stretching will be strongly discouraged.
// Note: due tot he nature of timestretch latency, it's always a wee bit harder to
// cope with low fps (underruns) than it is high fps (overruns). So to help out a
// little, the low-end portions of this check are less forgiving than the high-sides.
if( cTempo < 0.965f || cTempo > 1.060f ||
pctChange < -0.38f || pctChange > 0.54f ||
statusPct < -0.42f || statusPct > 0.70f ||
eTempo < 0.89f || eTempo > 1.19f )
{
//printf("Emergency stretch: cTempo = %f eTempo = %f pctChange = %f statusPct = %f\n",cTempo,eTempo,pctChange,statusPct);
emergencyAdj = ( pow( statusPct*statusWeight, 3.0f ) * statusRange);
}
// Smooth things out by factoring our previous adjustment into this one.
// It helps make the system 'feel' a little smarter by giving it at least
// one packet worth of history to help work off of:
emergencyAdj = (emergencyAdj * 0.75f) + (lastEmergencyAdj * 0.25f );
lastEmergencyAdj = emergencyAdj;
lastPct = statusPct;
// Accumulate a fraction of the tempo change into the tempo itself.
// This helps the system run "smarter" to games that run consistently
// fast or slow by altering the base tempo to something closer to the
// game's active speed. In tests most games normalize within 2 seconds
// at 100ms latency, which is pretty good (larger buffers normalize even
// quicker).
newcee += newcee * (tempoChange+emergencyAdj) * 0.03f;
// Apply tempoChange as a scale of cTempo. That way the effect is proportional
// to the current tempo. (otherwise tempos rate of change at the extremes would
// be too drastic)
float newTempo = newcee + ( emergencyAdj * cTempo );
// ... and as a final optimization, only stretch if the new tempo is outside
// a nominal threshold. Keep this threshold check small, because it could
// cause some serious side effects otherwise. (enlarging the cTempo check above
// is usually better/safer)
if( newTempo < 0.970f || newTempo > 1.045f )
{
cTempo = (float)newcee;
if( newTempo < 0.10f ) newTempo = 0.10f;
else if( newTempo > 10.0f ) newTempo = 10.0f;
if( cTempo < 0.15f ) cTempo = 0.15f;
else if( cTempo > 7.5f ) cTempo = 7.5f;
pSoundTouch->setTempo( eTempo = (float)newTempo );
/*ConLog("* SPU2-X: [Nominal %d%%] [Emergency: %d%%] (baseTempo: %d%% ) (newTempo: %d%%) (buffer: %d%%)\n",
//(relation < 0.0) ? "Normalize" : "",
(int)(tempoChange * 100.0 * 0.03),
(int)(emergencyAdj * 100.0),
(int)(cTempo * 100.0),
(int)(newTempo * 100.0),
(int)(statusPct * 100.0)
);*/
}
else
{
// Nominal operation -- turn off stretching.
// note: eTempo 'slides' toward 1.0 for smoother audio and better
// protection against spikes.
if( cTempo != 1.0f )
{
cTempo = 1.0f;
eTempo = ( 1.0f + eTempo ) * 0.5f;
pSoundTouch->setTempo( eTempo );
}
else
{
if( eTempo != cTempo )
pSoundTouch->setTempo( eTempo=cTempo );
}
}
}
extern uint TickInterval;
void SndBuffer::UpdateTempoChangeAsyncMixing()
{
float statusPct = GetStatusPct();
lastPct = statusPct;
if( statusPct < -0.1f )
{
TickInterval -= 4;
if( statusPct < -0.3f ) TickInterval = 64;
if( TickInterval < 64 ) TickInterval = 64;
//printf("-- %d, %f\n",TickInterval,statusPct);
}
else if( statusPct > 0.2f )
{
TickInterval += 1;
if( TickInterval >= 7000 ) TickInterval = 7000;
//printf("++ %d, %f\n",TickInterval,statusPct);
}
else TickInterval = 768;
}
void SndBuffer::timeStretchUnderrun()
{
gRequestStretcherReset++;
// timeStretcher failed it's job. We need to slow down the audio some.
cTempo -= (cTempo * 0.12f);
eTempo -= (eTempo * 0.30f);
if( eTempo < 0.1f ) eTempo = 0.1f;
// pSoundTouch->setTempo( eTempo );
//pSoundTouch->setTempoChange(-30); // temporary (until stretcher is called) slow down
}
s32 SndBuffer::timeStretchOverrun()
{
// If we overran it means the timestretcher failed. We need to speed
// up audio playback.
cTempo += cTempo * 0.12f;
eTempo += eTempo * 0.40f;
if( eTempo > 7.5f ) eTempo = 7.5f;
//pSoundTouch->setTempo( eTempo );
//pSoundTouch->setTempoChange(30);// temporary (until stretcher is called) speed up
// Throw out just a little bit (two packets worth) to help
// give the TS some room to work:
gRequestStretcherReset++;
return SndOutPacketSize*2;
}
static void CvtPacketToFloat( StereoOut32* srcdest )
{
StereoOutFloat* dest = (StereoOutFloat*)srcdest;
const StereoOut32* src = (StereoOut32*)srcdest;
for( uint i=0; i<SndOutPacketSize; ++i, ++dest, ++src )
*dest = (StereoOutFloat)*src;
}
// Parameter note: Size should always be a multiple of 128, thanks!
static void CvtPacketToInt( StereoOut32* srcdest, uint size )
{
//pxAssume( (size & 127) == 0 );
const StereoOutFloat* src = (StereoOutFloat*)srcdest;
StereoOut32* dest = srcdest;
for( uint i=0; i<size; ++i, ++dest, ++src )
*dest = (StereoOut32)*src;
}
void SndBuffer::timeStretchWrite()
{
// data prediction helps keep the tempo adjustments more accurate.
// The timestretcher returns packets in belated "clump" form.
// Meaning that most of the time we'll get nothing back, and then
// suddenly we'll get several chunks back at once. Thus we use
// data prediction to make the timestretcher more responsive.
PredictDataWrite( (int)( SndOutPacketSize / eTempo ) );
CvtPacketToFloat( sndTempBuffer );
pSoundTouch->putSamples( (float*)sndTempBuffer, SndOutPacketSize );
int tempProgress;
while( tempProgress = pSoundTouch->receiveSamples( (float*)sndTempBuffer, SndOutPacketSize),
tempProgress != 0 )
{
// Hint: It's assumed that pSoundTouch will return chunks of 128 bytes (it always does as
// long as the SSE optimizations are enabled), which means we can do our own SSE opts here.
CvtPacketToInt( sndTempBuffer, tempProgress );
_WriteSamples( sndTempBuffer, tempProgress );
}
#ifdef SPU2X_USE_OLD_STRETCHER
UpdateTempoChangeSoundTouch();
#else
UpdateTempoChangeSoundTouch2();
#endif
}
void SndBuffer::soundtouchInit()
{
pSoundTouch = new soundtouch::SoundTouch();
pSoundTouch->setSampleRate(SampleRate);
pSoundTouch->setChannels(2);
pSoundTouch->setSetting( SETTING_USE_QUICKSEEK, 0 );
pSoundTouch->setSetting( SETTING_USE_AA_FILTER, 0 );
SoundtouchCfg::ApplySettings( *pSoundTouch );
pSoundTouch->setTempo(1);
// some timestretch management vars:
cTempo = 1.0;
eTempo = 1.0;
lastPct = 0;
lastEmergencyAdj = 0;
m_predictData = 0;
}
// reset timestretch management vars, and delay updates a bit:
void SndBuffer::soundtouchClearContents()
{
if( pSoundTouch == NULL ) return;
pSoundTouch->clear();
pSoundTouch->setTempo(1);
cTempo = 1.0;
eTempo = 1.0;
lastPct = 0;
lastEmergencyAdj = 0;
m_predictData = 0;
}
void SndBuffer::soundtouchCleanup()
{
safe_delete( pSoundTouch );
}