Merge pull request #42 from degasus/latencyFix

audio latency fix
This commit is contained in:
Pierre Bourdon 2014-02-15 18:22:57 +01:00
commit cbe7656b2f
3 changed files with 58 additions and 115 deletions

View File

@ -7,6 +7,8 @@
#include "AudioCommon.h" #include "AudioCommon.h"
#include "CPUDetect.h" #include "CPUDetect.h"
#include "../Core/Host.h" #include "../Core/Host.h"
#include "ConfigManager.h"
#include "HW/VideoInterface.h"
#include "../Core/HW/AudioInterface.h" #include "../Core/HW/AudioInterface.h"
@ -18,7 +20,7 @@
#endif #endif
// Executed from sound stream thread // Executed from sound stream thread
unsigned int CMixer::Mix(short* samples, unsigned int numSamples) unsigned int CMixer::Mix(short* samples, unsigned int numSamples, bool consider_framelimit)
{ {
if (!samples) if (!samples)
return 0; return 0;
@ -32,16 +34,7 @@ unsigned int CMixer::Mix(short* samples, unsigned int numSamples)
return numSamples; return numSamples;
} }
unsigned int numLeft = GetNumSamples(); unsigned int currentSample = 0;
if (m_AIplaying) {
if (numLeft < numSamples)//cannot do much about this
m_AIplaying = false;
if (numLeft < MAX_SAMPLES/4)//low watermark
m_AIplaying = false;
} else {
if (numLeft > MAX_SAMPLES/2)//high watermark
m_AIplaying = true;
}
// Cache access in non-volatile variable // Cache access in non-volatile variable
// This is the only function changing the read value, so it's safe to // This is the only function changing the read value, so it's safe to
@ -53,100 +46,68 @@ unsigned int CMixer::Mix(short* samples, unsigned int numSamples)
u32 indexR = Common::AtomicLoad(m_indexR); u32 indexR = Common::AtomicLoad(m_indexR);
u32 indexW = Common::AtomicLoad(m_indexW); u32 indexW = Common::AtomicLoad(m_indexW);
if (m_AIplaying) { float numLeft = ((indexW - indexR) & INDEX_MASK) / 2;
numLeft = (numLeft > numSamples) ? numSamples : numLeft; m_numLeftI = (numLeft + m_numLeftI*(CONTROL_AVG-1)) / CONTROL_AVG;
float offset = (m_numLeftI - LOW_WATERMARK) * CONTROL_FACTOR;
if(offset > MAX_FREQ_SHIFT) offset = MAX_FREQ_SHIFT;
if(offset < -MAX_FREQ_SHIFT) offset = -MAX_FREQ_SHIFT;
if (AudioInterface::GetAIDSampleRate() == m_sampleRate) // (1:1)
{
#if _M_SSE >= 0x301
if (cpu_info.bSSSE3 && !((numLeft * 2) % 8))
{
static const __m128i sr_mask =
_mm_set_epi32(0x0C0D0E0FL, 0x08090A0BL,
0x04050607L, 0x00010203L);
for (unsigned int i = 0; i < numLeft * 2; i += 8)
{
_mm_storeu_si128((__m128i *)&samples[i], _mm_shuffle_epi8(_mm_loadu_si128((__m128i *)&m_buffer[(indexR + i) & INDEX_MASK]), sr_mask));
}
}
else
#endif
{
for (unsigned int i = 0; i < numLeft * 2; i+=2)
{
samples[i] = Common::swap16(m_buffer[(indexR + i + 1) & INDEX_MASK]);
samples[i+1] = Common::swap16(m_buffer[(indexR + i) & INDEX_MASK]);
}
}
indexR += numLeft * 2;
}
else //linear interpolation
{
//render numleft sample pairs to samples[] //render numleft sample pairs to samples[]
//advance indexR with sample position //advance indexR with sample position
//remember fractional offset //remember fractional offset
static u32 frac = 0; u32 framelimit = SConfig::GetInstance().m_Framelimit;
const u32 ratio = (u32)( 65536.0f * (float)AudioInterface::GetAIDSampleRate() / (float)m_sampleRate ); float aid_sample_rate = AudioInterface::GetAIDSampleRate() + offset;
if (consider_framelimit && framelimit > 2)
{
aid_sample_rate = aid_sample_rate * (framelimit - 1) * 5 / VideoInterface::TargetRefreshRate;
}
for (u32 i = 0; i < numLeft * 2; i+=2) { static u32 frac = 0;
const u32 ratio = (u32)( 65536.0f * aid_sample_rate / (float)m_sampleRate );
if(ratio > 0x10000)
ERROR_LOG(AUDIO, "ratio out of range");
for (; currentSample < numSamples*2 && ((indexW-indexR) & INDEX_MASK) > 2; currentSample+=2) {
u32 indexR2 = indexR + 2; //next sample u32 indexR2 = indexR + 2; //next sample
if ((indexR2 & INDEX_MASK) == (indexW & INDEX_MASK)) //..if it exists
indexR2 = indexR;
s16 l1 = Common::swap16(m_buffer[indexR & INDEX_MASK]); //current s16 l1 = Common::swap16(m_buffer[indexR & INDEX_MASK]); //current
s16 l2 = Common::swap16(m_buffer[indexR2 & INDEX_MASK]); //next s16 l2 = Common::swap16(m_buffer[indexR2 & INDEX_MASK]); //next
int sampleL = ((l1 << 16) + (l2 - l1) * (u16)frac) >> 16; int sampleL = ((l1 << 16) + (l2 - l1) * (u16)frac) >> 16;
samples[i+1] = sampleL; samples[currentSample+1] = sampleL;
s16 r1 = Common::swap16(m_buffer[(indexR + 1) & INDEX_MASK]); //current s16 r1 = Common::swap16(m_buffer[(indexR + 1) & INDEX_MASK]); //current
s16 r2 = Common::swap16(m_buffer[(indexR2 + 1) & INDEX_MASK]); //next s16 r2 = Common::swap16(m_buffer[(indexR2 + 1) & INDEX_MASK]); //next
int sampleR = ((r1 << 16) + (r2 - r1) * (u16)frac) >> 16; int sampleR = ((r1 << 16) + (r2 - r1) * (u16)frac) >> 16;
samples[i] = sampleR; samples[currentSample] = sampleR;
frac += ratio; frac += ratio;
indexR += 2 * (u16)(frac >> 16); indexR += 2 * (u16)(frac >> 16);
frac &= 0xffff; frac &= 0xffff;
} }
}
} else {
numLeft = 0;
}
// Padding // Padding
if (numSamples > numLeft)
{
unsigned short s[2]; unsigned short s[2];
s[0] = Common::swap16(m_buffer[(indexR - 1) & INDEX_MASK]); s[0] = Common::swap16(m_buffer[(indexR - 1) & INDEX_MASK]);
s[1] = Common::swap16(m_buffer[(indexR - 2) & INDEX_MASK]); s[1] = Common::swap16(m_buffer[(indexR - 2) & INDEX_MASK]);
for (unsigned int i = numLeft*2; i < numSamples*2; i+=2) for (; currentSample < numSamples*2; currentSample+=2)
*(u32*)(samples+i) = *(u32*)(s); {
// memset(&samples[numLeft * 2], 0, (numSamples - numLeft) * 4); samples[currentSample] = s[0];
samples[currentSample+1] = s[1];
} }
// Flush cached variable // Flush cached variable
Common::AtomicStore(m_indexR, indexR); Common::AtomicStore(m_indexR, indexR);
//when logging, also throttle HLE audio
if (m_logAudio) {
if (m_AIplaying) {
Premix(samples, numLeft);
AudioInterface::Callback_GetStreaming(samples, numLeft, m_sampleRate);
g_wave_writer.AddStereoSamples(samples, numLeft);
}
}
else { //or mix as usual
// Add the DSPHLE sound, re-sampling is done inside // Add the DSPHLE sound, re-sampling is done inside
Premix(samples, numSamples); Premix(samples, numSamples);
// Add the DTK Music // Add the DTK Music
// Re-sampling is done inside // Re-sampling is done inside
AudioInterface::Callback_GetStreaming(samples, numSamples, m_sampleRate); AudioInterface::Callback_GetStreaming(samples, numSamples, m_sampleRate);
} if (m_logAudio)
g_wave_writer.AddStereoSamples(samples, numSamples);
return numSamples; return numSamples;
} }
@ -198,24 +159,3 @@ void CMixer::PushSamples(const short *samples, unsigned int num_samples)
return; return;
} }
unsigned int CMixer::GetNumSamples()
{
// Guess how many samples would be available after interpolation.
// As interpolation needs at least on sample from the future to
// linear interpolate between them, one sample less is available.
// We also can't say the current interpolation state (specially
// the frac), so to be sure, subtract one again to be sure not
// to underflow the fifo.
u32 numSamples = ((Common::AtomicLoad(m_indexW) - Common::AtomicLoad(m_indexR)) & INDEX_MASK) / 2;
if (AudioInterface::GetAIDSampleRate() == m_sampleRate)
; //numSamples = numSamples; // 1:1
else if (m_sampleRate == 48000 && AudioInterface::GetAIDSampleRate() == 32000)
numSamples = numSamples * 3 / 2 - 2; // most common case
else
numSamples = numSamples * m_sampleRate / AudioInterface::GetAIDSampleRate() - 2;
return numSamples;
}

View File

@ -8,9 +8,13 @@
#include "StdMutex.h" #include "StdMutex.h"
// 16 bit Stereo // 16 bit Stereo
#define MAX_SAMPLES (1024 * 8) #define MAX_SAMPLES (1024 * 2) // 64ms
#define INDEX_MASK (MAX_SAMPLES * 2 - 1) #define INDEX_MASK (MAX_SAMPLES * 2 - 1)
#define RESERVED_SAMPLES (256)
#define LOW_WATERMARK 1280 // 40 ms
#define MAX_FREQ_SHIFT 200 // per 32000 Hz
#define CONTROL_FACTOR 0.2 // in freq_shift per fifo size offset
#define CONTROL_AVG 32
class CMixer { class CMixer {
@ -24,7 +28,7 @@ public:
, m_logAudio(0) , m_logAudio(0)
, m_indexW(0) , m_indexW(0)
, m_indexR(0) , m_indexR(0)
, m_AIplaying(true) , m_numLeftI(0.0f)
{ {
// AyuanX: The internal (Core & DSP) sample rate is fixed at 32KHz // AyuanX: The internal (Core & DSP) sample rate is fixed at 32KHz
// So when AI/DAC sample rate differs than 32KHz, we have to do re-sampling // So when AI/DAC sample rate differs than 32KHz, we have to do re-sampling
@ -38,9 +42,8 @@ public:
virtual ~CMixer() {} virtual ~CMixer() {}
// Called from audio threads // Called from audio threads
virtual unsigned int Mix(short* samples, unsigned int numSamples); virtual unsigned int Mix(short* samples, unsigned int numSamples, bool consider_framelimit = true);
virtual void Premix(short * /*samples*/, unsigned int /*numSamples*/) {} virtual void Premix(short * /*samples*/, unsigned int /*numSamples*/) {}
unsigned int GetNumSamples();
// Called from main thread // Called from main thread
virtual void PushSamples(const short* samples, unsigned int num_samples); virtual void PushSamples(const short* samples, unsigned int num_samples);
@ -98,8 +101,8 @@ protected:
volatile u32 m_indexW; volatile u32 m_indexW;
volatile u32 m_indexR; volatile u32 m_indexR;
bool m_AIplaying;
std::mutex m_csMixing; std::mutex m_csMixing;
float m_numLeftI;
volatile float m_speed; // Current rate of the emulation (1.0 = 100% speed) volatile float m_speed; // Current rate of the emulation (1.0 = 100% speed)
private: private:

View File

@ -192,7 +192,7 @@ void OpenALStream::SoundLoop()
unsigned int minSamples = surround_capable ? 240 : 0; // DPL2 accepts 240 samples minimum (FWRDURATION) unsigned int minSamples = surround_capable ? 240 : 0; // DPL2 accepts 240 samples minimum (FWRDURATION)
numSamples = (numSamples > OAL_MAX_SAMPLES) ? OAL_MAX_SAMPLES : numSamples; numSamples = (numSamples > OAL_MAX_SAMPLES) ? OAL_MAX_SAMPLES : numSamples;
numSamples = m_mixer->Mix(realtimeBuffer, numSamples); numSamples = m_mixer->Mix(realtimeBuffer, numSamples, false);
// Convert the samples from short to float // Convert the samples from short to float
float dest[OAL_MAX_SAMPLES * STEREO_CHANNELS]; float dest[OAL_MAX_SAMPLES * STEREO_CHANNELS];