pcsx2/plugins/spu2-x/src/SndOut.cpp

723 lines
20 KiB
C++
Raw Normal View History

/* SPU2-X, A plugin for Emulating the Sound Processing Unit of the Playstation 2
* Developed and maintained by the Pcsx2 Development Team.
*
* Original portions from SPU2ghz are (c) 2008 by David Quintana [gigaherz]
*
* This library is free software; you can redistribute it and/or modify it under
* the terms of the GNU Lesser General Public License as published by the Free
* Software Foundation; either version 2.1 of the the License, or (at your
* option) any later version.
*
* This library is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
* for more details.
*
* You should have received a copy of the GNU Lesser General Public License along
* with this library; if not, write to the Free Software Foundation, Inc., 59
* Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
*/
// [TODO] : The layout of this code file is now a complete hackish mess after
// numerous timestretch-related additions. The whole thing should really be
// rethought and redone at this point.
#include "spu2.h"
#include "SoundTouch/SoundTouch.h"
#include "SoundTouch/WavFile.h"
#include <new>
static int ts_stats_stretchblocks = 0;
static int ts_stats_normalblocks = 0;
static int ts_stats_logcounter = 0;
class NullOutModule: public SndOutModule
{
public:
s32 Init(SndBuffer *) { return 0; }
void Close() { }
s32 Test() const { return 0; }
void Configure(HWND parent) { }
bool Is51Out() const { return false; }
int GetEmptySampleCount() const { return 0; }
const wchar_t* GetIdent() const
{
return _T("nullout");
}
const wchar_t* GetLongName() const
{
return _T("No Sound (Emulate SPU2 only)");
}
} NullOut;
SndOutModule* mods[]=
{
&NullOut,
XAudio2Out,
DSoundOut,
WaveOut,
//ASIOOut,
NULL // signals the end of our list
};
int FindOutputModuleById( const wchar_t* omodid )
{
int modcnt = 0;
while( mods[modcnt] != NULL )
{
if( wcscmp( mods[modcnt]->GetIdent(), omodid ) == 0 )
break;
++modcnt;
}
return modcnt;
}
__forceinline s16 SndScaleVol( s32 inval )
{
return inval >> SndOutVolumeShift;
}
// records last buffer status (fill %, range -100 to 100, with 0 being 50% full)
float lastPct;
float lastEmergencyAdj;
float cTempo=1;
float eTempo = 1;
int freezeTempo = 0;
soundtouch::SoundTouch* pSoundTouch=NULL;
//usefull when timestretch isn't available
class SndBufferImpl: public SndBuffer
{
private:
s32 *buffer;
s32 size;
s32 rpos;
s32 wpos;
s32 data;
// data prediction amount, used to "commit" data that hasn't
// finished timestretch processing.
s32 predictData;
bool pw;
bool underrun_freeze;
protected:
int GetAlignedBufferSize( int comp )
{
return (comp + SndOutPacketSize-1) & ~(SndOutPacketSize-1);
}
public:
SndBufferImpl( float latencyMS )
{
rpos=0;
wpos=0;
data=0;
size=GetAlignedBufferSize( (int)(latencyMS * SampleRate / 500.0f ) );
buffer = new s32[size];
pw=false;
underrun_freeze = false;
predictData = 0;
}
virtual ~SndBufferImpl()
{
delete buffer;
}
virtual void WriteSamples(s32 *bData, int nSamples)
{
int free = size-data;
predictData = 0;
jASSUME( data <= size );
// Problem:
// If the SPU2 gets out of sync with the SndOut device, the writepos of the
// circular buffer will overtake the readpos, leading to a prolonged period
// of hopscotching read/write accesses (ie, lots of staticy crap sound for
// several seconds).
//
// Compromise:
// When an overrun occurs, we adapt by discarding a portion of the buffer.
// The older portion of the buffer is discarded rather than incoming data,
// so that the overall audio synchronization is better.
if( free < nSamples )
{
// Buffer overrun!
// Dump samples from the read portion of the buffer instead of dropping
// the newly written stuff.
s32 comp;
if( !timeStretchDisabled )
{
// If we overran it means the timestretcher failed. We need to speed
// up audio playback.
cTempo += cTempo * 0.12f;
eTempo += eTempo * 0.40f;
if( eTempo > 7.5f ) eTempo = 7.5f;
pSoundTouch->setTempo( eTempo );
// Throw out just a little bit (two packets worth) to help
// give the TS some room to work:
comp = SndOutPacketSize*2;
}
else
{
// Toss half the buffer plus whatever's being written anew:
comp = GetAlignedBufferSize( (size + nSamples ) / 2 );
if( comp > (size-SndOutPacketSize) ) comp = size-SndOutPacketSize;
}
data -= comp;
rpos = (rpos+comp)%size;
if( MsgOverruns() )
ConLog(" * SPU2 > Overrun Compensation (%d packets tossed)\n", comp / SndOutPacketSize );
lastPct = 0.0; // normalize the timestretcher
}
// copy in two phases, since there's a chance the packet
// wraps around the buffer (it'd be nice to deal in packets only, but
// the timestretcher and DSP options require flexibility).
const int endPos = wpos + nSamples;
const int secondCopyLen = endPos - size;
s32* wposbuffer = &buffer[wpos];
data += nSamples;
if( secondCopyLen > 0 )
{
nSamples -= secondCopyLen;
memcpy( buffer, &bData[nSamples], secondCopyLen * sizeof( *bData ) );
wpos = secondCopyLen;
}
else
wpos += nSamples;
memcpy( wposbuffer, bData, nSamples * sizeof( *bData ) );
}
protected:
// Returns TRUE if there is data to be output, or false if no data
// is available to be copied.
bool CheckUnderrunStatus( int& nSamples, int& quietSampleCount )
{
quietSampleCount = 0;
if( underrun_freeze )
{
int toFill = (int)(size * ( timeStretchDisabled ? 0.50f : 0.1f ) );
toFill = GetAlignedBufferSize( toFill );
// toFill is now aligned to a SndOutPacket
if( data < toFill )
{
quietSampleCount = nSamples;
return false;
}
underrun_freeze = false;
if( MsgOverruns() )
ConLog(" * SPU2 > Underrun compensation (%d packets buffered)\n", toFill / SndOutPacketSize );
lastPct = 0.0; // normalize timestretcher
}
else if( data < nSamples )
{
nSamples = data;
quietSampleCount = SndOutPacketSize - data;
underrun_freeze = true;
if( !timeStretchDisabled )
{
// timeStretcher failed it's job. We need to slow down the audio some.
cTempo -= (cTempo * 0.12f);
eTempo -= (eTempo * 0.30f);
if( eTempo < 0.1f ) eTempo = 0.1f;
pSoundTouch->setTempo( eTempo );
}
return nSamples != 0;
}
return true;
}
public:
void ReadSamples( s16* bData )
{
int nSamples = SndOutPacketSize;
// Problem:
// If the SPU2 gets even the least bit out of sync with the SndOut device,
// the readpos of the circular buffer will overtake the writepos,
// leading to a prolonged period of hopscotching read/write accesses (ie,
// lots of staticy crap sound for several seconds).
//
// Fix:
// If the read position overtakes the write position, abort the
// transfer immediately and force the SndOut driver to wait until
// the read buffer has filled up again before proceeding.
// This will cause one brief hiccup that can never exceed the user's
// set buffer length in duration.
int quietSamples;
if( CheckUnderrunStatus( nSamples, quietSamples ) )
{
jASSUME( nSamples <= SndOutPacketSize );
// [Air] [TODO]: This loop is probably a candidiate for SSE2 optimization.
const int endPos = rpos + nSamples;
const int secondCopyLen = endPos - size;
const s32* rposbuffer = &buffer[rpos];
data -= nSamples;
if( secondCopyLen > 0 )
{
nSamples -= secondCopyLen;
for( int i=0; i<secondCopyLen; i++ )
bData[nSamples+i] = SndScaleVol( buffer[i] );
rpos = secondCopyLen;
}
else
rpos += nSamples;
for( int i=0; i<nSamples; i++ )
bData[i] = SndScaleVol( rposbuffer[i] );
}
// If quietSamples != 0 it means we have an underrun...
// Let's just dull out some silence, because that's usually the least
// painful way of dealing with underruns:
memset( bData, 0, quietSamples * sizeof(*bData) );
}
void ReadSamples( s32* bData )
{
int nSamples = SndOutPacketSize;
// Problem:
// If the SPU2 gets even the least bit out of sync with the SndOut device,
// the readpos of the circular buffer will overtake the writepos,
// leading to a prolonged period of hopscotching read/write accesses (ie,
// lots of staticy crap sound for several seconds).
//
// Fix:
// If the read position overtakes the write position, abort the
// transfer immediately and force the SndOut driver to wait until
// the read buffer has filled up again before proceeding.
// This will cause one brief hiccup that can never exceed the user's
// set buffer length in duration.
int quietSamples;
if( CheckUnderrunStatus( nSamples, quietSamples ) )
{
// nSamples is garaunteed non-zero if CheckUnderrunStatus
// returned true.
const int endPos = rpos + nSamples;
const int secondCopyLen = endPos - size;
const int oldrpos = rpos;
data -= nSamples;
if( secondCopyLen > 0 )
{
nSamples -= secondCopyLen;
memcpy( &bData[nSamples], buffer, secondCopyLen * sizeof( *bData ) );
rpos = secondCopyLen;
}
else
rpos += nSamples;
memcpy( bData, &buffer[oldrpos], nSamples * sizeof( *bData ) );
}
// If quietSamples != 0 it means we have an underrun...
// Let's just dull out some silence, because that's usually the least
// painful way of dealing with underruns:
memset( bData, 0, quietSamples * sizeof(*bData) );
}
void PredictDataWrite( int samples )
{
predictData += samples;
}
virtual void PauseOnWrite(bool doPause) { pw = doPause; }
// Calculate the buffer status percentage.
// Returns range from -1.0 to 1.0
// 1.0 = buffer overflow!
// 0.0 = buffer nominal (50% full)
// -1.0 = buffer underflow!
float GetStatusPct()
{
// Get the buffer status of the output driver too, so that we can
// obtain a more accurate overall buffer status.
int drvempty = mods[OutputModule]->GetEmptySampleCount(); // / 2;
//ConLog( "Data %d >>> driver: %d predict: %d\n", data, drvempty, predictData );
float result = (float)(data + predictData - drvempty) - (size/2);
result /= (size/2);
return result;
}
};
SndBufferImpl *sndBuffer=NULL;
s32* sndTempBuffer=NULL;
s32 sndTempProgress=NULL;
s16* sndTempBuffer16=NULL;
void UpdateTempoChange()
{
if( --freezeTempo > 0 )
{
return;
}
float statusPct = sndBuffer->GetStatusPct();
float pctChange = statusPct - lastPct;
float tempoChange;
float emergencyAdj = 0;
float newcee = cTempo; // workspace var. for cTempo
// IMPORTANT!
// If you plan to tweak these values, make sure you're using a release build
// OUTSIDE THE DEBUGGER to test it! The Visual Studio debugger can really cause
// erratic behavior in the audio buffers, and makes the timestretcher seem a
// lot more inconsistent than it really is.
// We have two factors.
// * Distance from nominal buffer status (50% full)
// * The change from previous update to this update.
// Prediction based on the buffer change:
// (linear seems to work better here)
tempoChange = pctChange * 0.75f;
if( statusPct * tempoChange < 0.0f )
{
// only apply tempo change if it is in synch with the buffer status.
// In other words, if the buffer is high (over 0%), and is decreasing,
// ignore it. It'll just muck things up.
tempoChange = 0;
}
// Sudden spikes in framerate can cause the nominal buffer status
// to go critical, in which case we have to enact an emergency
// stretch. The following cubic formulas do that. Values near
// the extremeites give much larger results than those near 0.
// And the value is added only this time, and does not accumulate.
// (otherwise a large value like this would cause problems down the road)
// Constants:
// Weight - weights the statusPct's "emergency" consideration.
// higher values here will make the buffer perform more drastic
// compensations at the outer edges of the buffer (at -75 or +75%
// or beyond, for example).
// Range - scales the adjustment to the given range (more or less).
// The actual range is dependent on the weight used, so if you increase
// Weight you'll usually want to decrease Range somewhat to compensate.
// Prediction based on the buffer fill status:
const float statusWeight = 2.99f;
const float statusRange = 0.068f;
// "non-emergency" deadzone: In this area stretching will be strongly discouraged.
// Note: due tot he nature of timestretch latency, it's always a wee bit harder to
// cope with low fps (underruns) tha it is high fps (overruns). So to help out a
// little, the low-end portions of this check are less forgiving than the high-sides.
if( cTempo < 0.965f || cTempo > 1.060f ||
pctChange < -0.38f || pctChange > 0.54f ||
statusPct < -0.32f || statusPct > 0.39f ||
eTempo < 0.89f || eTempo > 1.19f )
{
emergencyAdj = ( pow( statusPct*statusWeight, 3.0f ) * statusRange);
}
// Smooth things out by factoring our previous adjustment into this one.
// It helps make the system 'feel' a little smarter by giving it at least
// one packet worth of history to help work off of:
emergencyAdj = (emergencyAdj * 0.75f) + (lastEmergencyAdj * 0.25f );
lastEmergencyAdj = emergencyAdj;
lastPct = statusPct;
// Accumulate a fraction of the tempo change into the tempo itself.
// This helps the system run "smarter" to games that run consistently
// fast or slow by altering the base tempo to something closer to the
// game's active speed. In tests most games normalize within 2 seconds
// at 100ms latency, which is pretty good (larger buffers normalize even
// quicker).
newcee += newcee * (tempoChange+emergencyAdj) * 0.03f;
// Apply tempoChange as a scale of cTempo. That way the effect is proportional
// to the current tempo. (otherwise tempos rate of change at the extremes would
// be too drastic)
float newTempo = newcee + ( emergencyAdj * cTempo );
// ... and as a final optimization, only stretch if the new tempo is outside
// a nominal threshold. Keep this threshold check small, because it could
// cause some serious side effects otherwise. (enlarging the cTempo check above
// is usually better/safer)
if( newTempo < 0.970f || newTempo > 1.045f )
{
cTempo = (float)newcee;
if( newTempo < 0.10f ) newTempo = 0.10f;
else if( newTempo > 10.0f ) newTempo = 10.0f;
if( cTempo < 0.15f ) cTempo = 0.15f;
else if( cTempo > 7.5f ) cTempo = 7.5f;
pSoundTouch->setTempo( eTempo = (float)newTempo );
ts_stats_stretchblocks++;
/*ConLog(" * SPU2: [Nominal %d%%] [Emergency: %d%%] (baseTempo: %d%% ) (newTempo: %d%%) (buffer: %d%%)\n",
//(relation < 0.0) ? "Normalize" : "",
(int)(tempoChange * 100.0 * 0.03),
(int)(emergencyAdj * 100.0),
(int)(cTempo * 100.0),
(int)(newTempo * 100.0),
(int)(statusPct * 100.0)
);*/
}
else
{
// Nominal operation -- turn off stretching.
// note: eTempo 'slides' toward 1.0 for smoother audio and better
// protection against spikes.
if( cTempo != 1.0f )
{
cTempo = 1.0f;
eTempo = ( 1.0f + eTempo ) * 0.5f;
pSoundTouch->setTempo( eTempo );
}
else
{
if( eTempo != cTempo )
pSoundTouch->setTempo( eTempo=cTempo );
ts_stats_normalblocks++;
}
}
}
void soundtouchInit()
{
pSoundTouch = new soundtouch::SoundTouch();
pSoundTouch->setSampleRate(SampleRate);
pSoundTouch->setChannels(2);
pSoundTouch->setSetting( SETTING_USE_QUICKSEEK, 0 );
pSoundTouch->setSetting( SETTING_USE_AA_FILTER, 0 );
pSoundTouch->setSetting( SETTING_SEQUENCE_MS, SoundtouchCfg::SequenceLenMS );
pSoundTouch->setSetting( SETTING_SEEKWINDOW_MS, SoundtouchCfg::SeekWindowMS );
pSoundTouch->setSetting( SETTING_OVERLAP_MS, SoundtouchCfg::OverlapMS );
pSoundTouch->setTempo(1);
// some timestretch management vars:
cTempo = 1.0;
eTempo = 1.0;
lastPct = 0;
lastEmergencyAdj = 0;
// just freeze tempo changes for a while at startup.
// the driver buffers are bogus anyway.
freezeTempo = 8;
}
static void _sndInitFail()
{
// If a failure occurs, just initialize the NoSound driver. This'll allow
// the game to emulate properly (hopefully), albeit without sound.
OutputModule = FindOutputModuleById( NullOut.GetIdent() );
mods[OutputModule]->Init( sndBuffer );
}
s32 SndInit()
{
if( mods[OutputModule] == NULL )
{
_sndInitFail();
return 0;
}
// initialize sound buffer
// Buffer actually attempts to run ~50%, so allocate near double what
// the requested latency is:
try
{
sndBuffer = new SndBufferImpl( SndOutLatencyMS * (timeStretchDisabled ? 1.5f : 2.0f ) );
sndTempBuffer = new s32[SndOutPacketSize];
sndTempBuffer16 = new s16[SndOutPacketSize];
}
catch( std::bad_alloc& )
{
// out of memory exception (most likely)
SysMessage( "Out of memory error occured while initializing SPU2." );
_sndInitFail();
return 0;
}
// clear buffers!
// Fixes loopy sounds on emu resets.
memset( sndTempBuffer, 0, sizeof(s32) * SndOutPacketSize );
memset( sndTempBuffer16, 0, sizeof(s16) * SndOutPacketSize );
sndTempProgress = 0;
soundtouchInit(); // initializes the timestretching
// some crap
spdif_set51(mods[OutputModule]->Is51Out());
// initialize module
if( mods[OutputModule]->Init(sndBuffer) == -1 )
{
_sndInitFail();
}
return 0;
}
void SndClose()
{
mods[OutputModule]->Close();
SAFE_DELETE_OBJ( sndBuffer );
SAFE_DELETE_ARRAY( sndTempBuffer );
SAFE_DELETE_ARRAY( sndTempBuffer16 );
SAFE_DELETE_OBJ( pSoundTouch );
}
s32 SndWrite(s32 ValL, s32 ValR)
{
// Log final output to wavefile.
WaveDump::WriteCore( 1, CoreSrc_External, SndScaleVol(ValL), SndScaleVol(ValR) );
RecordWrite(SndScaleVol(ValL),SndScaleVol(ValR));
if(mods[OutputModule] == &NullOut) // null output doesn't need buffering or stretching! :p
return 0;
sndTempBuffer[sndTempProgress++] = ValL;
sndTempBuffer[sndTempProgress++] = ValR;
// If we haven't accumulated a full packet yet, do nothing more:
if(sndTempProgress < SndOutPacketSize) return 1;
if(dspPluginEnabled)
{
for(int i=0;i<SndOutPacketSize;i++) { sndTempBuffer16[i] = SndScaleVol( sndTempBuffer[i] ); }
// send to winamp DSP
sndTempProgress = DspProcess(sndTempBuffer16,sndTempProgress>>1)<<1;
for(int i=0;i<sndTempProgress;i++) { sndTempBuffer[i] = sndTempBuffer16[i]<<SndOutVolumeShift; }
}
static int equalized = 0;
if( !timeStretchDisabled )
{
bool progress = false;
// data prediction helps keep the tempo adjustments more accurate.
// The timestretcher returns packets in belated "clump" form.
// Meaning that most of the time we'll get nothing back, and then
// suddenly we'll get several chunks back at once. Thus we use
// data prediction to make the timestretcher more responsive.
sndBuffer->PredictDataWrite( (int)( sndTempProgress / eTempo ) );
for(int i=0;i<sndTempProgress;i++) { ((float*)sndTempBuffer)[i] = sndTempBuffer[i]/2147483648.0f; }
pSoundTouch->putSamples((float*)sndTempBuffer, sndTempProgress>>1);
while( ( sndTempProgress = pSoundTouch->receiveSamples((float*)sndTempBuffer, sndTempProgress>>1)<<1 ) != 0 )
{
// [Air] [TODO] : Implement an SSE downsampler to int.
for(int i=0;i<sndTempProgress;i++)
{
sndTempBuffer[i] = (s32)(((float*)sndTempBuffer)[i]*2147483648.0f);
}
sndBuffer->WriteSamples(sndTempBuffer, sndTempProgress);
progress = true;
}
UpdateTempoChange();
if( MsgOverruns() )
{
if( progress )
{
if( ++ts_stats_logcounter > 300 )
{
ts_stats_logcounter = 0;
ConLog( " * SPU2 > Timestretch Stats > %d%% of packets stretched.\n",
( ts_stats_stretchblocks * 100 ) / ( ts_stats_normalblocks + ts_stats_stretchblocks ) );
ts_stats_normalblocks = 0;
ts_stats_stretchblocks = 0;
}
}
}
}
else
{
sndBuffer->WriteSamples(sndTempBuffer, sndTempProgress);
sndTempProgress=0;
}
return 1;
}
s32 SndTest()
{
if( mods[OutputModule] == NULL )
return -1;
return mods[OutputModule]->Test();
}
void SndConfigure(HWND parent, u32 module )
{
if( mods[module] == NULL )
return;
mods[module]->Configure(parent);
}