Remove High Frequency Distortion and Prevent Crackling in Almost All Cases

2024-06-23 15:10:06 -04:00 · 2024-06-23 15:10:06 -04:00 · 78f0083032
parent 2aec195ec8
commit 78f0083032
14 changed files with 560 additions and 313 deletions
--- a/Source/Core/AudioCommon/AudioStretcher.cpp
+++ b/Source/Core/AudioCommon/AudioStretcher.cpp
@ -12,7 +12,7 @@

 namespace AudioCommon
 {
-AudioStretcher::AudioStretcher(unsigned int sample_rate) : m_sample_rate(sample_rate)
+AudioStretcher::AudioStretcher(u64 sample_rate) : m_sample_rate(sample_rate)
 {
  m_sound_touch.setChannels(2);
  m_sound_touch.setSampleRate(sample_rate);
@ -25,46 +25,45 @@ void AudioStretcher::Clear()
  m_sound_touch.clear();
 }

-void AudioStretcher::ProcessSamples(const short* in, unsigned int num_in, unsigned int num_out)
+void AudioStretcher::ProcessSamples(const s16* in, u32 num_in, u32 num_out)
 {
-  const double time_delta = static_cast<double>(num_out) / m_sample_rate;  // seconds
+  const double time_delta = static_cast<double>(num_out) / m_sample_rate;
+  double current_ratio = static_cast<double>(num_in) / num_out;  // Current sample ratio.

-  // We were given actual_samples number of samples, and num_samples were requested from us.
-  double current_ratio = static_cast<double>(num_in) / static_cast<double>(num_out);
+  // Calculate maximum allowable backlog based on configured maximum latency.
+  const double max_latency = Config::Get(Config::MAIN_AUDIO_STRETCH_LATENCY) / 1000.0;
+  const double max_backlog = m_sample_rate * max_latency;  // Max number of samples in the backlog.
+  const double num_samples = m_sound_touch.numSamples();

-  const double max_latency = Config::Get(Config::MAIN_AUDIO_STRETCH_LATENCY);
-  const double max_backlog = m_sample_rate * max_latency / 1000.0 / m_stretch_ratio;
-  const double backlog_fullness = m_sound_touch.numSamples() / max_backlog;
-  if (backlog_fullness > 5.0)
-  {
-    // Too many samples in backlog: Don't push anymore on
+  // Prevent backlog from growing too large.
+  if (num_samples >= 5.0 * max_backlog)
    num_in = 0;
-  }

-  // We ideally want the backlog to be about 50% full.
-  // This gives some headroom both ways to prevent underflow and overflow.
-  // We tweak current_ratio to encourage this.
-  constexpr double tweak_time_scale = 0.5;  // seconds
-  current_ratio *= 1.0 + 2.0 * (backlog_fullness - 0.5) * (time_delta / tweak_time_scale);
+  // Target for backlog to be about 50% full to allow flexibility.
+  const double low_watermark = 0.5 * max_backlog;
+  const double requested_samples = m_stretch_ratio * num_out;
+  const double num_left = num_samples - requested_samples;

-  // This low-pass filter smoothes out variance in the calculated stretch ratio.
-  // The time-scale determines how responsive this filter is.
-  constexpr double lpf_time_scale = 1.0;  // seconds
-  const double lpf_gain = 1.0 - std::exp(-time_delta / lpf_time_scale);
+  // Adjustment parameters similar to those used in Mixer for pitch adjustment.
+  const double lpf_time_scale = 1.0 / 16.0;
+  const double control_effort = 1.0 / 16.0;
+  current_ratio *= 1.0 + control_effort * (num_left - low_watermark) / requested_samples;
+  current_ratio = std::clamp(current_ratio, 0.1, 10.0);
+
+  // Calculate target sample ratio and apply low-pass filter to smooth changes.
+  const double lpf_gain = -std::expm1(-time_delta / lpf_time_scale);
  m_stretch_ratio += lpf_gain * (current_ratio - m_stretch_ratio);
-
-  // Place a lower limit of 10% speed.  When a game boots up, there will be
-  // many silence samples.  These do not need to be timestretched.
-  m_stretch_ratio = std::max(m_stretch_ratio, 0.1);
  m_sound_touch.setTempo(m_stretch_ratio);

-  DEBUG_LOG_FMT(AUDIO, "Audio stretching: samples:{}/{} ratio:{} backlog:{} gain: {}", num_in,
-                num_out, m_stretch_ratio, backlog_fullness, lpf_gain);
+  // Debug log to monitor stretching stats.
+  DEBUG_LOG_FMT(AUDIO, "Audio stretching: samples:{}/{} ratio:{} backlog:{} gain:{}", num_in,
+                num_out, m_stretch_ratio, num_samples / max_backlog, lpf_gain);

+  // Add new samples to the processor for stretching.
  m_sound_touch.putSamples(in, num_in);
 }

-void AudioStretcher::GetStretchedSamples(short* out, unsigned int num_out)
+void AudioStretcher::GetStretchedSamples(s16* out, u32 num_out)
 {
  const size_t samples_received = m_sound_touch.receiveSamples(out, num_out);

@ -82,4 +81,10 @@ void AudioStretcher::GetStretchedSamples(short* out, unsigned int num_out)
  }
 }

+DT AudioStretcher::AvailableSamplesTime() const
+{
+  const u32 backlog = m_sound_touch.numSamples();
+  return std::chrono::duration_cast<DT>(DT_s(backlog) / m_sample_rate);
+}
+
 }  // namespace AudioCommon
--- a/Source/Core/AudioCommon/AudioStretcher.h
+++ b/Source/Core/AudioCommon/AudioStretcher.h
@ -7,19 +7,23 @@

 #include <SoundTouch.h>

+#include "Common/CommonTypes.h"
+
 namespace AudioCommon
 {
 class AudioStretcher
 {
 public:
-  explicit AudioStretcher(unsigned int sample_rate);
-  void ProcessSamples(const short* in, unsigned int num_in, unsigned int num_out);
-  void GetStretchedSamples(short* out, unsigned int num_out);
+  explicit AudioStretcher(u64 sample_rate);
+  void ProcessSamples(const s16* in, u32 num_in, u32 num_out);
+  void GetStretchedSamples(s16* out, u32 num_out);
  void Clear();

+  DT AvailableSamplesTime() const;
+
 private:
-  unsigned int m_sample_rate;
-  std::array<short, 2> m_last_stretched_sample = {};
+  u64 m_sample_rate;
+  std::array<s16, 2> m_last_stretched_sample = {};
  soundtouch::SoundTouch m_sound_touch;
  double m_stretch_ratio = 1.0;
 };
--- a/Source/Core/AudioCommon/Mixer.cpp
+++ b/Source/Core/AudioCommon/Mixer.cpp
@ -7,6 +7,8 @@
 #include <cmath>
 #include <cstring>

+#include <Core/Core.h>
+
 #include "AudioCommon/Enums.h"
 #include "Common/ChunkFile.h"
 #include "Common/CommonTypes.h"
@ -31,7 +33,7 @@ static u32 DPL2QualityToFrameBlockSize(AudioCommon::DPL2Quality quality)
  }
 }

-Mixer::Mixer(unsigned int BackendSampleRate)
+Mixer::Mixer(u32 BackendSampleRate)
    : m_sampleRate(BackendSampleRate), m_stretcher(BackendSampleRate),
      m_surround_decoder(BackendSampleRate,
                         DPL2QualityToFrameBlockSize(Config::Get(Config::MAIN_DPL2_QUALITY)))
@ -57,119 +59,164 @@ void Mixer::DoState(PointerWrap& p)
    mixer.DoState(p);
 }

-// Executed from sound stream thread
-unsigned int Mixer::MixerFifo::Mix(short* samples, unsigned int numSamples,
-                                   bool consider_framelimit, float emulationspeed,
-                                   int timing_variance)
+std::pair<float, float> Mixer::MixerFifo::GetSample(u32 index, float frac, float sinc_ratio)
 {
-  unsigned int currentSample = 0;
+  constexpr float pi = 3.14159265358979323846f;
+  const s32 sinc_window_width = m_mixer->m_config_sinc_window_width;

-  // Cache access in non-volatile variable
-  // This is the only function changing the read value, so it's safe to
-  // cache it locally although it's written here.
-  // The writing pointer will be modified outside, but it will only increase,
-  // so we will just ignore new written data while interpolating.
-  // Without this cache, the compiler wouldn't be allowed to optimize the
-  // interpolation loop.
-  u32 indexR = m_indexR.load();
-  u32 indexW = m_indexW.load();
-
-  // render numleft sample pairs to samples[]
-  // advance indexR with sample position
-  // remember fractional offset
-
-  float aid_sample_rate =
-      FIXED_SAMPLE_RATE_DIVIDEND / static_cast<float>(m_input_sample_rate_divisor);
-  if (consider_framelimit && emulationspeed > 0.0f)
+  float l_sample = 0.0f, r_sample = 0.0f;
+  for (s32 i = 1 - sinc_window_width; i <= sinc_window_width; ++i)
  {
-    float numLeft = static_cast<float>(((indexW - indexR) & INDEX_MASK) / 2);
+    const auto [l, r] = m_buffer[(index + i) & INDEX_MASK];

-    u32 low_watermark = (FIXED_SAMPLE_RATE_DIVIDEND * timing_variance) /
-                        (static_cast<u64>(m_input_sample_rate_divisor) * 1000);
-    low_watermark = std::min(low_watermark, MAX_SAMPLES / 2);
+    const float x = pi * (i - frac);
+    float weight = 0.53836f + 0.46164f * std::cos(x / sinc_window_width);
+    if (std::abs(x) >= 1e-6f)
+      weight *= std::sin(x * sinc_ratio) / x;
+    else
+      weight *= sinc_ratio;

-    m_numLeftI = (numLeft + m_numLeftI * (CONTROL_AVG - 1)) / CONTROL_AVG;
-    float offset = (m_numLeftI - low_watermark) * CONTROL_FACTOR;
-    if (offset > MAX_FREQ_SHIFT)
-      offset = MAX_FREQ_SHIFT;
-    if (offset < -MAX_FREQ_SHIFT)
-      offset = -MAX_FREQ_SHIFT;
-
-    aid_sample_rate = (aid_sample_rate + offset) * emulationspeed;
+    r_sample += weight * r;
+    l_sample += weight * l;
  }

-  const u32 ratio = (u32)(65536.0f * aid_sample_rate / (float)m_mixer->m_sampleRate);
+  return std::make_pair(l_sample, r_sample);
+}

-  s32 lvolume = m_LVolume.load();
-  s32 rvolume = m_RVolume.load();
+// Executed from sound stream thread
+void Mixer::MixerFifo::Mix(s16* samples, u32 num_samples, double target_speed)
+{
+  // Cache access in non-volatile variable. No race conditions are here, as indexW only increases
+  // (which is fine), and this is the only place indexR is written to.
+  const u32 indexW = m_indexW.load();  // Write index in circular buffer
+  u32 indexR = m_indexR.load();        // Read index in circular buffer

-  const auto read_buffer = [this](auto index) {
-    return m_little_endian ? m_buffer[index] : Common::swap16(m_buffer[index]);
-  };
+  const float l_volume = float(m_LVolume.load()) / 256.0f;
+  const float r_volume = float(m_RVolume.load()) / 256.0f;

-  // TODO: consider a higher-quality resampling algorithm.
-  for (; currentSample < numSamples * 2 && ((indexW - indexR) & INDEX_MASK) > 2; currentSample += 2)
+  auto clamp = [](s32 input) -> s16 { return static_cast<s16>(std::clamp(input, -32768, 32767)); };
+
+  const double ratio = (target_speed * FIXED_SAMPLE_RATE_DIVIDEND) /
+                       (m_input_sample_rate_divisor * m_mixer->m_sampleRate);
+
+  // These are timing markers. We want to keep the audio buffer around the low watermark.
+  const s32 direct_latency = m_mixer->m_config_direct_latency;
+  const s32 low_watermark = std::min<s32>(MAX_SAMPLES, direct_latency * FIXED_SAMPLE_RATE_DIVIDEND /
+                                                           (m_input_sample_rate_divisor * 1000));
+
+  // Calculate the number of samples remaining in the buffer
+  const s32 sinc_window_width = m_mixer->m_config_sinc_window_width;
+  s32 remaining_samples = s32((indexW - indexR) & INDEX_MASK) - sinc_window_width - 2;
+
+  // Calculate the speed at which to play the requested samples in order to maintain the correct
+  // synchronization with the video stream. Done by slightly adjusting the pitch of the audio.
+  const double requested_samples = m_multiplier * num_samples * ratio;
+  double multiplier = (remaining_samples - low_watermark) / requested_samples;
+  multiplier = 1.0 + CONTROL_EFFORT * (multiplier - 1.0);
+  multiplier = std::clamp(multiplier, MIN_PITCH_SHIFT, MAX_PITCH_SHIFT);
+
+  // If the number of samples is above 2x the low watermark, skip ahead to prevent excessive latency
+  const double max_latency = 2.0 * low_watermark;
+  if (remaining_samples - requested_samples >= max_latency)
  {
-    u32 indexR2 = indexR + 2;  // next sample
+    const s32 jump = 1 + static_cast<s32>(remaining_samples - requested_samples - max_latency);
+    remaining_samples -= jump;
+    indexR += jump;
+  }

-    s16 l1 = read_buffer(indexR & INDEX_MASK);   // current
-    s16 l2 = read_buffer(indexR2 & INDEX_MASK);  // next
-    int sampleL = ((l1 << 16) + (l2 - l1) * (u16)m_frac) >> 16;
-    sampleL = (sampleL * lvolume) >> 8;
-    sampleL += samples[currentSample + 1];
-    samples[currentSample + 1] = std::clamp(sampleL, -32767, 32767);
+  // Low-pass filter to smooth out the multiplier (prevents sudden jumps in pitch)
+  const double lpf_gain = -std::expm1(-1.0 / (num_samples * SAMPLE_RATE_LPF));
+  const float sinc_ratio = 1.0f / std::max(1.0f, float(ratio * std::max(m_multiplier, multiplier)));

-    s16 r1 = read_buffer((indexR + 1) & INDEX_MASK);   // current
-    s16 r2 = read_buffer((indexR2 + 1) & INDEX_MASK);  // next
-    int sampleR = ((r1 << 16) + (r2 - r1) * (u16)m_frac) >> 16;
-    sampleR = (sampleR * rvolume) >> 8;
-    sampleR += samples[currentSample];
-    samples[currentSample] = std::clamp(sampleR, -32767, 32767);
+  bool reversed = false;
+  for (u32 sample = 0; sample < num_samples; ++sample)
+  {
+    const auto [l, r] = GetSample(indexR, m_frac, sinc_ratio);
+    samples[2 * sample + 0] = clamp(samples[2 * sample + 0] + s32(r * r_volume));
+    samples[2 * sample + 1] = clamp(samples[2 * sample + 1] + s32(l * l_volume));
+
+    // Smooth out multiplier with low pass filter
+    m_multiplier += lpf_gain * (multiplier - m_multiplier);
+
+    // Determine if we need to reverse the audio (i.e. we have run out of samples)
+    reversed |= remaining_samples <= 0;
+    reversed &= remaining_samples <= low_watermark;
+
+    // Manage fractional index and increment index.
+    m_frac += m_multiplier * (reversed ? -ratio : +ratio);
+    const s32 inc = static_cast<s32>(m_frac);
+    remaining_samples -= inc;
+    indexR += inc;
+    m_frac -= inc;
+  }
+
+  // Flush cached variable
+  m_indexR.store(indexR);
+}
+
+// This is for audio stretching, which requires no playback speed shenanigans
+u32 Mixer::MixerFifo::MixRaw(s16* samples, u32 num_samples)
+{
+  // Cache access in non-volatile variable. No race conditions are here, as indexW only increases
+  // (which is fine), and this is the only place indexR is written to.
+  const u32 indexW = m_indexW.load();  // Write index in circular buffer
+  u32 indexR = m_indexR.load();        // Read index in circular buffer
+
+  const float l_volume = float(m_LVolume.load()) / 256.0f;
+  const float r_volume = float(m_RVolume.load()) / 256.0f;
+
+  auto clamp = [](s32 input) -> s16 { return static_cast<s16>(std::clamp(input, -32768, 32767)); };
+
+  const double aid_sample_rate = FIXED_SAMPLE_RATE_DIVIDEND / double(m_input_sample_rate_divisor);
+  const double ratio = aid_sample_rate / m_mixer->m_sampleRate;
+  const float sinc_ratio = 1.0f / std::max(1.0f, static_cast<float>(ratio));
+
+  // Calculate the number of samples remaining in the buffer
+  const s32 sinc_window_width = m_mixer->m_config_sinc_window_width;
+  s32 remaining_samples = s32((indexW - indexR) & INDEX_MASK) - sinc_window_width - 2;
+  u32 sample = 0;
+
+  for (; sample < num_samples && 0 < remaining_samples; ++sample)
+  {
+    const auto [l, r] = GetSample(indexR, m_frac, sinc_ratio);
+    samples[2 * sample + 0] = clamp(samples[2 * sample + 0] + s32(r * r_volume));
+    samples[2 * sample + 1] = clamp(samples[2 * sample + 1] + s32(l * l_volume));

    m_frac += ratio;
-    indexR += 2 * (u16)(m_frac >> 16);
-    m_frac &= 0xffff;
+    const s32 inc = static_cast<s32>(m_frac);
+    remaining_samples -= inc;
+    indexR += inc;
+    m_frac -= inc;
  }

-  // Actual number of samples written to the buffer without padding.
-  unsigned int actual_sample_count = currentSample / 2;
-
-  // Padding
-  short s[2];
-  s[0] = read_buffer((indexR - 1) & INDEX_MASK);
-  s[1] = read_buffer((indexR - 2) & INDEX_MASK);
-  s[0] = (s[0] * rvolume) >> 8;
-  s[1] = (s[1] * lvolume) >> 8;
-  for (; currentSample < numSamples * 2; currentSample += 2)
+  if (sample != 0)
  {
-    int sampleR = std::clamp(s[0] + samples[currentSample + 0], -32767, 32767);
-    int sampleL = std::clamp(s[1] + samples[currentSample + 1], -32767, 32767);
-
-    samples[currentSample + 0] = sampleR;
-    samples[currentSample + 1] = sampleL;
+    for (u32 padding = sample; padding < num_samples; ++padding)
+    {
+      samples[2 * padding + 0] = samples[2 * sample - 2];
+      samples[2 * padding + 1] = samples[2 * sample - 1];
+    }
  }

  // Flush cached variable
  m_indexR.store(indexR);

-  return actual_sample_count;
+  return sample;
 }

-unsigned int Mixer::Mix(short* samples, unsigned int num_samples)
+u32 Mixer::Mix(s16* samples, u32 num_samples)
 {
  if (!samples)
    return 0;

-  memset(samples, 0, num_samples * 2 * sizeof(short));
+  memset(samples, 0, num_samples * 2 * sizeof(s16));

-  // TODO: Determine how emulation speed will be used in audio
-  // const float emulation_speed = g_perf_metrics.GetSpeed();
-  const float emulation_speed = m_config_emulation_speed;
-  const int timing_variance = m_config_timing_variance;
  if (m_config_audio_stretch)
  {
-    unsigned int available_samples =
-        std::min(m_dma_mixer.AvailableSamples(), m_streaming_mixer.AvailableSamples());
+    // We want to get as many samples out of this as possible.  Usually all mixers will have the
+    // same number of samples available, but if not, we want to empty all of them.
+    const u32 available_samples =
+        std::min({m_dma_mixer.AvailableSamples(), m_streaming_mixer.AvailableSamples()});

    ASSERT_MSG(AUDIO, available_samples <= MAX_SAMPLES,
               "Audio stretching would overflow m_scratch_buffer: min({}, {}) -> {} > {} ({})",
@ -178,43 +225,47 @@ unsigned int Mixer::Mix(short* samples, unsigned int num_samples)

    m_scratch_buffer.fill(0);

-    m_dma_mixer.Mix(m_scratch_buffer.data(), available_samples, false, emulation_speed,
-                    timing_variance);
-    m_streaming_mixer.Mix(m_scratch_buffer.data(), available_samples, false, emulation_speed,
-                          timing_variance);
-    m_wiimote_speaker_mixer.Mix(m_scratch_buffer.data(), available_samples, false, emulation_speed,
-                                timing_variance);
-    m_skylander_portal_mixer.Mix(m_scratch_buffer.data(), available_samples, false, emulation_speed,
-                                 timing_variance);
+    m_dma_mixer.MixRaw(m_scratch_buffer.data(), available_samples);
+    m_streaming_mixer.MixRaw(m_scratch_buffer.data(), available_samples);
+    m_wiimote_speaker_mixer.MixRaw(m_scratch_buffer.data(), available_samples);
+    m_skylander_portal_mixer.MixRaw(m_scratch_buffer.data(), available_samples);
+
    for (auto& mixer : m_gba_mixers)
-    {
-      mixer.Mix(m_scratch_buffer.data(), available_samples, false, emulation_speed,
-                timing_variance);
-    }
+      mixer.MixRaw(m_scratch_buffer.data(), available_samples);

    if (!m_is_stretching)
    {
      m_stretcher.Clear();
      m_is_stretching = true;
    }
+
    m_stretcher.ProcessSamples(m_scratch_buffer.data(), available_samples, num_samples);
    m_stretcher.GetStretchedSamples(samples, num_samples);
+
+    g_perf_metrics.CountAudioLatency(m_stretcher.AvailableSamplesTime() +
+                                     m_dma_mixer.AvailableSamplesTime());
  }
  else
  {
-    m_dma_mixer.Mix(samples, num_samples, true, emulation_speed, timing_variance);
-    m_streaming_mixer.Mix(samples, num_samples, true, emulation_speed, timing_variance);
-    m_wiimote_speaker_mixer.Mix(samples, num_samples, true, emulation_speed, timing_variance);
-    m_skylander_portal_mixer.Mix(samples, num_samples, true, emulation_speed, timing_variance);
+    float target_speed = m_config_emulation_speed;
+    if (target_speed <= 0.0f || Core::GetIsThrottlerTempDisabled())
+      target_speed = g_perf_metrics.GetAudioSpeed();
+
+    m_dma_mixer.Mix(samples, num_samples, target_speed);
+    m_streaming_mixer.Mix(samples, num_samples, target_speed);
+    m_wiimote_speaker_mixer.Mix(samples, num_samples, target_speed);
+    m_skylander_portal_mixer.Mix(samples, num_samples, target_speed);
    for (auto& mixer : m_gba_mixers)
-      mixer.Mix(samples, num_samples, true, emulation_speed, timing_variance);
+      mixer.Mix(samples, num_samples, target_speed);
+
+    g_perf_metrics.CountAudioLatency(m_dma_mixer.AvailableSamplesTime());
    m_is_stretching = false;
  }

  return num_samples;
 }

-unsigned int Mixer::MixSurround(float* samples, unsigned int num_samples)
+u32 Mixer::MixSurround(float* samples, u32 num_samples)
 {
  if (!num_samples)
    return 0;
@ -243,65 +294,62 @@ unsigned int Mixer::MixSurround(float* samples, unsigned int num_samples)
  return num_samples;
 }

-void Mixer::MixerFifo::PushSamples(const short* samples, unsigned int num_samples)
+void Mixer::MixerFifo::PushSamples(const s16* samples, u32 num_samples)
 {
  // Cache access in non-volatile variable
-  // indexR isn't allowed to cache in the audio throttling loop as it
-  // needs to get updates to not deadlock.
  u32 indexW = m_indexW.load();

-  // Check if we have enough free space
-  // indexW == m_indexR results in empty buffer, so indexR must always be smaller than indexW
-  if (num_samples * 2 + ((indexW - m_indexR.load()) & INDEX_MASK) >= MAX_SAMPLES * 2)
-    return;
+  // prevent writing into the buffer if it's full
+  const s32 sinc_window_width = m_mixer->m_config_sinc_window_width;
+  const s32 writable_samples =
+      static_cast<s32>((m_indexR.load() - sinc_window_width - indexW) & INDEX_MASK);
+  num_samples = std::min(num_samples, static_cast<u32>(std::max(0, writable_samples)));
+
+  // We do float conversion here to avoid doing it in the mixing loop, which is performance
+  // critical. That way we only need to do it once per sample.
+  if (m_little_endian)
+    for (u32 i = 0; i < num_samples; ++i, ++indexW)
+      m_buffer[indexW & INDEX_MASK] = std::make_pair(static_cast<float>(samples[i * 2 + 0]),
+                                                     static_cast<float>(samples[i * 2 + 1]));

-  // AyuanX: Actual re-sampling work has been moved to sound thread
-  // to alleviate the workload on main thread
-  // and we simply store raw data here to make fast mem copy
-  int over_bytes = num_samples * 4 - (MAX_SAMPLES * 2 - (indexW & INDEX_MASK)) * sizeof(short);
-  if (over_bytes > 0)
-  {
-    memcpy(&m_buffer[indexW & INDEX_MASK], samples, num_samples * 4 - over_bytes);
-    memcpy(&m_buffer[0], samples + (num_samples * 4 - over_bytes) / sizeof(short), over_bytes);
-  }
  else
-  {
-    memcpy(&m_buffer[indexW & INDEX_MASK], samples, num_samples * 4);
-  }
+    for (u32 i = 0; i < num_samples; ++i, ++indexW)
+      m_buffer[indexW & INDEX_MASK] =
+          std::make_pair(static_cast<float>(static_cast<s16>(Common::swap16(samples[i * 2 + 0]))),
+                         static_cast<float>(static_cast<s16>(Common::swap16(samples[i * 2 + 1]))));

-  m_indexW.fetch_add(num_samples * 2);
+  m_indexW.store(indexW);
 }

-void Mixer::PushSamples(const short* samples, unsigned int num_samples)
+void Mixer::PushSamples(const s16* samples, u32 num_samples)
 {
  m_dma_mixer.PushSamples(samples, num_samples);
  if (m_log_dsp_audio)
  {
-    int sample_rate_divisor = m_dma_mixer.GetInputSampleRateDivisor();
+    u32 sample_rate_divisor = m_dma_mixer.GetInputSampleRateDivisor();
    auto volume = m_dma_mixer.GetVolume();
    m_wave_writer_dsp.AddStereoSamplesBE(samples, num_samples, sample_rate_divisor, volume.first,
                                         volume.second);
  }
 }

-void Mixer::PushStreamingSamples(const short* samples, unsigned int num_samples)
+void Mixer::PushStreamingSamples(const s16* samples, u32 num_samples)
 {
  m_streaming_mixer.PushSamples(samples, num_samples);
  if (m_log_dtk_audio)
  {
-    int sample_rate_divisor = m_streaming_mixer.GetInputSampleRateDivisor();
+    u32 sample_rate_divisor = m_streaming_mixer.GetInputSampleRateDivisor();
    auto volume = m_streaming_mixer.GetVolume();
    m_wave_writer_dtk.AddStereoSamplesBE(samples, num_samples, sample_rate_divisor, volume.first,
                                         volume.second);
  }
 }

-void Mixer::PushWiimoteSpeakerSamples(const short* samples, unsigned int num_samples,
-                                      unsigned int sample_rate_divisor)
+void Mixer::PushWiimoteSpeakerSamples(const s16* samples, u32 num_samples, u32 sample_rate_divisor)
 {
  // Max 20 bytes/speaker report, may be 4-bit ADPCM so multiply by 2
  static constexpr u32 MAX_SPEAKER_SAMPLES = 20 * 2;
-  std::array<short, MAX_SPEAKER_SAMPLES * 2> samples_stereo;
+  std::array<s16, MAX_SPEAKER_SAMPLES * 2> samples_stereo;

  ASSERT_MSG(AUDIO, num_samples <= MAX_SPEAKER_SAMPLES,
             "num_samples would overflow samples_stereo: {} > {}", num_samples,
@ -310,9 +358,9 @@ void Mixer::PushWiimoteSpeakerSamples(const short* samples, unsigned int num_sam
  {
    m_wiimote_speaker_mixer.SetInputSampleRateDivisor(sample_rate_divisor);

-    for (unsigned int i = 0; i < num_samples; ++i)
+    for (u32 i = 0; i < num_samples; ++i)
    {
-      samples_stereo[i * 2] = samples[i];
+      samples_stereo[i * 2 + 0] = samples[i];
      samples_stereo[i * 2 + 1] = samples[i];
    }

@ -320,12 +368,12 @@ void Mixer::PushWiimoteSpeakerSamples(const short* samples, unsigned int num_sam
  }
 }

-void Mixer::PushSkylanderPortalSamples(const u8* samples, unsigned int num_samples)
+void Mixer::PushSkylanderPortalSamples(const u8* samples, u32 num_samples)
 {
  // Skylander samples are always supplied as 64 bytes, 32 x 16 bit samples
  // The portal speaker is 1 channel, so duplicate and play as stereo audio
  static constexpr u32 MAX_PORTAL_SPEAKER_SAMPLES = 32;
-  std::array<short, MAX_PORTAL_SPEAKER_SAMPLES * 2> samples_stereo;
+  std::array<s16, MAX_PORTAL_SPEAKER_SAMPLES * 2> samples_stereo;

  ASSERT_MSG(AUDIO, num_samples <= MAX_PORTAL_SPEAKER_SAMPLES,
             "num_samples is not less or equal to 32: {} > {}", num_samples,
@ -333,10 +381,10 @@ void Mixer::PushSkylanderPortalSamples(const u8* samples, unsigned int num_sampl

  if (num_samples <= MAX_PORTAL_SPEAKER_SAMPLES)
  {
-    for (unsigned int i = 0; i < num_samples; ++i)
+    for (u32 i = 0; i < num_samples; ++i)
    {
      s16 sample = static_cast<u16>(samples[i * 2 + 1]) << 8 | static_cast<u16>(samples[i * 2]);
-      samples_stereo[i * 2] = sample;
+      samples_stereo[i * 2 + 0] = sample;
      samples_stereo[i * 2 + 1] = sample;
    }

@ -344,37 +392,37 @@ void Mixer::PushSkylanderPortalSamples(const u8* samples, unsigned int num_sampl
  }
 }

-void Mixer::PushGBASamples(int device_number, const short* samples, unsigned int num_samples)
+void Mixer::PushGBASamples(int device_number, const s16* samples, u32 num_samples)
 {
  m_gba_mixers[device_number].PushSamples(samples, num_samples);
 }

-void Mixer::SetDMAInputSampleRateDivisor(unsigned int rate_divisor)
+void Mixer::SetDMAInputSampleRateDivisor(u32 rate_divisor)
 {
  m_dma_mixer.SetInputSampleRateDivisor(rate_divisor);
 }

-void Mixer::SetStreamInputSampleRateDivisor(unsigned int rate_divisor)
+void Mixer::SetStreamInputSampleRateDivisor(u32 rate_divisor)
 {
  m_streaming_mixer.SetInputSampleRateDivisor(rate_divisor);
 }

-void Mixer::SetGBAInputSampleRateDivisors(int device_number, unsigned int rate_divisor)
+void Mixer::SetGBAInputSampleRateDivisors(int device_number, u32 rate_divisor)
 {
  m_gba_mixers[device_number].SetInputSampleRateDivisor(rate_divisor);
 }

-void Mixer::SetStreamingVolume(unsigned int lvolume, unsigned int rvolume)
+void Mixer::SetStreamingVolume(u32 lvolume, u32 rvolume)
 {
  m_streaming_mixer.SetVolume(lvolume, rvolume);
 }

-void Mixer::SetWiimoteSpeakerVolume(unsigned int lvolume, unsigned int rvolume)
+void Mixer::SetWiimoteSpeakerVolume(u32 lvolume, u32 rvolume)
 {
  m_wiimote_speaker_mixer.SetVolume(lvolume, rvolume);
 }

-void Mixer::SetGBAVolume(int device_number, unsigned int lvolume, unsigned int rvolume)
+void Mixer::SetGBAVolume(int device_number, u32 lvolume, u32 rvolume)
 {
  m_gba_mixers[device_number].SetVolume(lvolume, rvolume);
 }
@ -456,8 +504,9 @@ void Mixer::StopLogDSPAudio()
 void Mixer::RefreshConfig()
 {
  m_config_emulation_speed = Config::Get(Config::MAIN_EMULATION_SPEED);
-  m_config_timing_variance = Config::Get(Config::MAIN_TIMING_VARIANCE);
  m_config_audio_stretch = Config::Get(Config::MAIN_AUDIO_STRETCH);
+  m_config_direct_latency = Config::Get(Config::MAIN_AUDIO_DIRECT_LATENCY);
+  m_config_sinc_window_width = Config::Get(Config::MAIN_AUDIO_SINC_WINDOW_WIDTH);
 }

 void Mixer::MixerFifo::DoState(PointerWrap& p)
@ -467,17 +516,17 @@ void Mixer::MixerFifo::DoState(PointerWrap& p)
  p.Do(m_RVolume);
 }

-void Mixer::MixerFifo::SetInputSampleRateDivisor(unsigned int rate_divisor)
+void Mixer::MixerFifo::SetInputSampleRateDivisor(u64 rate_divisor)
 {
  m_input_sample_rate_divisor = rate_divisor;
 }

-unsigned int Mixer::MixerFifo::GetInputSampleRateDivisor() const
+u64 Mixer::MixerFifo::GetInputSampleRateDivisor() const
 {
  return m_input_sample_rate_divisor;
 }

-void Mixer::MixerFifo::SetVolume(unsigned int lvolume, unsigned int rvolume)
+void Mixer::MixerFifo::SetVolume(s32 lvolume, s32 rvolume)
 {
  m_LVolume.store(lvolume + (lvolume >> 7));
  m_RVolume.store(rvolume + (rvolume >> 7));
@ -488,11 +537,26 @@ std::pair<s32, s32> Mixer::MixerFifo::GetVolume() const
  return std::make_pair(m_LVolume.load(), m_RVolume.load());
 }

-unsigned int Mixer::MixerFifo::AvailableSamples() const
+u32 Mixer::MixerFifo::AvailableFIFOSamples() const
 {
-  unsigned int samples_in_fifo = ((m_indexW.load() - m_indexR.load()) & INDEX_MASK) / 2;
-  if (samples_in_fifo <= 1)
-    return 0;  // Mixer::MixerFifo::Mix always keeps one sample in the buffer.
-  return (samples_in_fifo - 1) * static_cast<u64>(m_mixer->m_sampleRate) *
-         m_input_sample_rate_divisor / FIXED_SAMPLE_RATE_DIVIDEND;
+  const s32 samples_in_fifo = static_cast<s32>((m_indexW.load() - m_indexR.load()) & INDEX_MASK) -
+                              m_mixer->m_config_sinc_window_width - 4;
+
+  if (samples_in_fifo <= 0)
+    return 0;
+
+  return static_cast<u32>(samples_in_fifo);
+}
+
+u32 Mixer::MixerFifo::AvailableSamples() const
+{
+  const s64 samples_in_fifo = static_cast<s64>(AvailableFIFOSamples());
+  return static_cast<u32>(samples_in_fifo * m_mixer->m_sampleRate * m_input_sample_rate_divisor /
+                          FIXED_SAMPLE_RATE_DIVIDEND);
+}
+
+DT Mixer::MixerFifo::AvailableSamplesTime() const
+{
+  return std::chrono::duration_cast<DT>(DT_s(AvailableFIFOSamples()) * m_input_sample_rate_divisor /
+                                        FIXED_SAMPLE_RATE_DIVIDEND);
 }
--- a/Source/Core/AudioCommon/Mixer.h
+++ b/Source/Core/AudioCommon/Mixer.h
@ -5,6 +5,7 @@

 #include <array>
 #include <atomic>
+#include <optional>

 #include "AudioCommon/AudioStretcher.h"
 #include "AudioCommon/SurroundDecoder.h"
@ -17,32 +18,31 @@ class PointerWrap;
 class Mixer final
 {
 public:
-  explicit Mixer(unsigned int BackendSampleRate);
+  explicit Mixer(u32 BackendSampleRate);
  ~Mixer();

  void DoState(PointerWrap& p);

  // Called from audio threads
-  unsigned int Mix(short* samples, unsigned int numSamples);
-  unsigned int MixSurround(float* samples, unsigned int num_samples);
+  u32 Mix(s16* samples, u32 num_samples);
+  u32 MixSurround(float* samples, u32 num_samples);

  // Called from main thread
-  void PushSamples(const short* samples, unsigned int num_samples);
-  void PushStreamingSamples(const short* samples, unsigned int num_samples);
-  void PushWiimoteSpeakerSamples(const short* samples, unsigned int num_samples,
-                                 unsigned int sample_rate_divisor);
-  void PushSkylanderPortalSamples(const u8* samples, unsigned int num_samples);
-  void PushGBASamples(int device_number, const short* samples, unsigned int num_samples);
+  void PushSamples(const s16* samples, u32 num_samples);
+  void PushStreamingSamples(const s16* samples, u32 num_samples);
+  void PushWiimoteSpeakerSamples(const s16* samples, u32 num_samples, u32 sample_rate_divisor);
+  void PushSkylanderPortalSamples(const u8* samples, u32 num_samples);
+  void PushGBASamples(int device_number, const s16* samples, u32 num_samples);

-  unsigned int GetSampleRate() const { return m_sampleRate; }
+  u32 GetSampleRate() const { return m_sampleRate; }

-  void SetDMAInputSampleRateDivisor(unsigned int rate_divisor);
-  void SetStreamInputSampleRateDivisor(unsigned int rate_divisor);
-  void SetGBAInputSampleRateDivisors(int device_number, unsigned int rate_divisor);
+  void SetDMAInputSampleRateDivisor(u32 rate_divisor);
+  void SetStreamInputSampleRateDivisor(u32 rate_divisor);
+  void SetGBAInputSampleRateDivisors(int device_number, u32 rate_divisor);

-  void SetStreamingVolume(unsigned int lvolume, unsigned int rvolume);
-  void SetWiimoteSpeakerVolume(unsigned int lvolume, unsigned int rvolume);
-  void SetGBAVolume(int device_number, unsigned int lvolume, unsigned int rvolume);
+  void SetStreamingVolume(u32 lvolume, u32 rvolume);
+  void SetWiimoteSpeakerVolume(u32 lvolume, u32 rvolume);
+  void SetGBAVolume(int device_number, u32 lvolume, u32 rvolume);

  void StartLogDTKAudio(const std::string& filename);
  void StopLogDTKAudio();
@ -55,43 +55,48 @@ public:

 private:
  static constexpr u32 MAX_SAMPLES = 1024 * 4;  // 128 ms
-  static constexpr u32 INDEX_MASK = MAX_SAMPLES * 2 - 1;
-  static constexpr int MAX_FREQ_SHIFT = 200;  // Per 32000 Hz
-  static constexpr float CONTROL_FACTOR = 0.2f;
-  static constexpr u32 CONTROL_AVG = 32;  // In freq_shift per FIFO size offset
+  static constexpr u32 INDEX_MASK = MAX_SAMPLES - 1;
+  static constexpr double MAX_PITCH_SHIFT = 1.0145453;  // 2 ^ (+1/48) - 1/4th Note Up
+  static constexpr double MIN_PITCH_SHIFT = 0.9856632;  // 2 ^ (-1/48) - 1/4th Note Down
+  static constexpr double SAMPLE_RATE_LPF = 1.0 / 4.0;  // How much to smooth out sample rate
+  static constexpr double CONTROL_EFFORT = 1.0 / 64.0;  // Lowers the strength of pitch shifting

-  const unsigned int SURROUND_CHANNELS = 6;
+  const u32 SURROUND_CHANNELS = 6;

  class MixerFifo final
  {
  public:
-    MixerFifo(Mixer* mixer, unsigned sample_rate_divisor, bool little_endian)
+    MixerFifo(Mixer* mixer, u32 sample_rate_divisor, bool little_endian)
        : m_mixer(mixer), m_input_sample_rate_divisor(sample_rate_divisor),
          m_little_endian(little_endian)
    {
    }
    void DoState(PointerWrap& p);
-    void PushSamples(const short* samples, unsigned int num_samples);
-    unsigned int Mix(short* samples, unsigned int numSamples, bool consider_framelimit,
-                     float emulationspeed, int timing_variance);
-    void SetInputSampleRateDivisor(unsigned int rate_divisor);
-    unsigned int GetInputSampleRateDivisor() const;
-    void SetVolume(unsigned int lvolume, unsigned int rvolume);
+    void PushSamples(const s16* samples, u32 num_samples);
+    std::pair<float, float> GetSample(u32 index, float frac, float sinc_ratio);
+    void Mix(s16* samples, u32 num_samples, double target_speed);
+    u32 MixRaw(s16* samples, u32 num_samples);
+    void SetInputSampleRateDivisor(u64 rate_divisor);
+    u64 GetInputSampleRateDivisor() const;
+    void SetVolume(s32 lvolume, s32 rvolume);
    std::pair<s32, s32> GetVolume() const;
-    unsigned int AvailableSamples() const;
+    u32 AvailableFIFOSamples() const;
+    u32 AvailableSamples() const;
+    DT AvailableSamplesTime() const;

  private:
    Mixer* m_mixer;
-    unsigned m_input_sample_rate_divisor;
+    u64 m_input_sample_rate_divisor;
    bool m_little_endian;
-    std::array<short, MAX_SAMPLES * 2> m_buffer{};
+    std::array<std::pair<float, float>, MAX_SAMPLES> m_buffer{};
    std::atomic<u32> m_indexW{0};
    std::atomic<u32> m_indexR{0};
    // Volume ranges from 0-256
    std::atomic<s32> m_LVolume{256};
    std::atomic<s32> m_RVolume{256};
-    float m_numLeftI = 0.0f;
-    u32 m_frac = 0;
+
+    double m_multiplier = 1.0;
+    double m_frac = 0.0;
  };

  void RefreshConfig();
@ -104,12 +109,12 @@ private:
                                        MixerFifo{this, FIXED_SAMPLE_RATE_DIVIDEND / 48000, true},
                                        MixerFifo{this, FIXED_SAMPLE_RATE_DIVIDEND / 48000, true},
                                        MixerFifo{this, FIXED_SAMPLE_RATE_DIVIDEND / 48000, true}};
-  unsigned int m_sampleRate;
+  u32 m_sampleRate;

  bool m_is_stretching = false;
  AudioCommon::AudioStretcher m_stretcher;
  AudioCommon::SurroundDecoder m_surround_decoder;
-  std::array<short, MAX_SAMPLES * 2> m_scratch_buffer{};
+  std::array<s16, MAX_SAMPLES * 2> m_scratch_buffer{};

  WaveFileWriter m_wave_writer_dtk;
  WaveFileWriter m_wave_writer_dsp;
@ -118,8 +123,9 @@ private:
  bool m_log_dsp_audio = false;

  float m_config_emulation_speed;
-  int m_config_timing_variance;
  bool m_config_audio_stretch;
+  int m_config_direct_latency;
+  int m_config_sinc_window_width;

  Config::ConfigChangedCallbackID m_config_changed_callback_id;
 };
--- a/Source/Core/Core/Config/MainSettings.cpp
+++ b/Source/Core/Core/Config/MainSettings.cpp
@ -44,7 +44,6 @@ const Info<bool> MAIN_LARGE_ENTRY_POINTS_MAP{{System::Main, "Core", "LargeEntryP
 const Info<bool> MAIN_ACCURATE_CPU_CACHE{{System::Main, "Core", "AccurateCPUCache"}, false};
 const Info<bool> MAIN_DSP_HLE{{System::Main, "Core", "DSPHLE"}, true};
 const Info<int> MAIN_MAX_FALLBACK{{System::Main, "Core", "MaxFallback"}, 100};
-const Info<int> MAIN_TIMING_VARIANCE{{System::Main, "Core", "TimingVariance"}, 40};
 const Info<bool> MAIN_CPU_THREAD{{System::Main, "Core", "CPUThread"}, true};
 const Info<bool> MAIN_SYNC_ON_SKIP_IDLE{{System::Main, "Core", "SyncOnSkipIdle"}, true};
 const Info<std::string> MAIN_DEFAULT_ISO{{System::Main, "Core", "DefaultISO"}, ""};
@ -57,7 +56,9 @@ const Info<AudioCommon::DPL2Quality> MAIN_DPL2_QUALITY{{System::Main, "Core", "D
                                                       AudioCommon::GetDefaultDPL2Quality()};
 const Info<int> MAIN_AUDIO_LATENCY{{System::Main, "Core", "AudioLatency"}, 20};
 const Info<bool> MAIN_AUDIO_STRETCH{{System::Main, "Core", "AudioStretch"}, false};
+const Info<int> MAIN_AUDIO_DIRECT_LATENCY{{System::Main, "Core", "AudioDirectLatency"}, 20};
 const Info<int> MAIN_AUDIO_STRETCH_LATENCY{{System::Main, "Core", "AudioStretchMaxLatency"}, 80};
+const Info<int> MAIN_AUDIO_SINC_WINDOW_WIDTH{{System::Main, "Core", "AudioSincResampleTaps"}, 6};
 const Info<std::string> MAIN_MEMCARD_A_PATH{{System::Main, "Core", "MemcardAPath"}, ""};
 const Info<std::string> MAIN_MEMCARD_B_PATH{{System::Main, "Core", "MemcardBPath"}, ""};
 const Info<std::string>& GetInfoForMemcardPath(ExpansionInterface::Slot slot)
--- a/Source/Core/Core/Config/MainSettings.h
+++ b/Source/Core/Core/Config/MainSettings.h
@ -62,7 +62,6 @@ extern const Info<bool> MAIN_ACCURATE_CPU_CACHE;
 // Should really be in the DSP section, but we're kind of stuck with bad decisions made in the past.
 extern const Info<bool> MAIN_DSP_HLE;
 extern const Info<int> MAIN_MAX_FALLBACK;
-extern const Info<int> MAIN_TIMING_VARIANCE;
 extern const Info<bool> MAIN_CPU_THREAD;
 extern const Info<bool> MAIN_SYNC_ON_SKIP_IDLE;
 extern const Info<std::string> MAIN_DEFAULT_ISO;
@ -73,7 +72,9 @@ extern const Info<bool> MAIN_DPL2_DECODER;
 extern const Info<AudioCommon::DPL2Quality> MAIN_DPL2_QUALITY;
 extern const Info<int> MAIN_AUDIO_LATENCY;
 extern const Info<bool> MAIN_AUDIO_STRETCH;
+extern const Info<int> MAIN_AUDIO_DIRECT_LATENCY;
 extern const Info<int> MAIN_AUDIO_STRETCH_LATENCY;
+extern const Info<int> MAIN_AUDIO_SINC_WINDOW_WIDTH;
 extern const Info<std::string> MAIN_MEMCARD_A_PATH;
 extern const Info<std::string> MAIN_MEMCARD_B_PATH;
 const Info<std::string>& GetInfoForMemcardPath(ExpansionInterface::Slot slot);
--- a/Source/Core/Core/CoreTiming.cpp
+++ b/Source/Core/Core/CoreTiming.cpp
@ -136,8 +136,6 @@ void CoreTimingManager::RefreshConfig()
  // too long or going full speed in an attempt to catch up to timings.
  m_max_fallback = std::chrono::duration_cast<DT>(DT_ms(Config::Get(Config::MAIN_MAX_FALLBACK)));

-  m_max_variance = std::chrono::duration_cast<DT>(DT_ms(Config::Get(Config::MAIN_TIMING_VARIANCE)));
-
  if (AchievementManager::GetInstance().IsHardcoreModeActive() &&
      Config::Get(Config::MAIN_EMULATION_SPEED) < 1.0f &&
      Config::Get(Config::MAIN_EMULATION_SPEED) > 0.0f)
@ -399,7 +397,7 @@ void CoreTimingManager::Throttle(const s64 target_cycle)
    m_throttle_deadline = min_deadline;
  }

-  const TimePoint vi_deadline = time - std::min(m_max_fallback, m_max_variance) / 2;
+  const TimePoint vi_deadline = time - m_max_fallback;

  // Skip the VI interrupt if the CPU is lagging by a certain amount.
  // It doesn't matter what amount of lag we skip VI at, as long as it's constant.
--- a/Source/Core/Core/CoreTiming.h
+++ b/Source/Core/Core/CoreTiming.h
@ -195,7 +195,6 @@ private:
  bool m_throttle_disable_vi_int = false;

  DT m_max_fallback = {};
-  DT m_max_variance = {};
  double m_emulation_speed = 1.0;

  void ResetThrottle(s64 cycle);
--- a/Source/Core/DolphinQt/Settings/AudioPane.cpp
+++ b/Source/Core/DolphinQt/Settings/AudioPane.cpp
@ -15,6 +15,7 @@
 #include <QSlider>
 #include <QSpacerItem>
 #include <QSpinBox>
+#include <QStackedWidget>
 #include <QVBoxLayout>

 #include "AudioCommon/AudioCommon.h"
@ -137,32 +138,81 @@ void AudioPane::CreateWidgets()
  backend_layout->addRow(dolby_quality_layout);
  backend_layout->addRow(m_dolby_quality_latency_label);

-  auto* stretching_box = new QGroupBox(tr("Audio Stretching Settings"));
-  auto* stretching_layout = new QGridLayout;
-  m_stretching_enable = new QCheckBox(tr("Enable Audio Stretching"));
+  // Set up for new Audio Options Box with ComboBox and StackedWidget
+  m_audio_group = new QGroupBox(tr("Audio Options"));
+  auto* audio_options_layout = new QGridLayout(m_audio_group);
+
+  // Resampling Options
+  m_audio_resampling_box = new QComboBox;
+  m_audio_resampling_box->addItem(tr("Default (12 Samples)"));
+  m_audio_resampling_box->addItem(tr("High (32 Samples)"));
+  m_audio_resampling_box->addItem(tr("Very High (96 Samples)"));
+  m_audio_resampling_box->addItem(tr("Placebo (256 Samples)"));
+  m_audio_resampling_box->setToolTip(tr(
+      "Dolphin must resample audio from ~32000hz to the modern sample rates like 48000hz.<br><br>"
+      "Higher quality options will only result in lower distortion <i>at high frequencies "
+      "(~16000hz)</i>, at the cost of increased CPU usage. <b>The vast majority of people will be "
+      "unable to notice</b>. <br><br>"
+      "<b>WARNING: If your CPU is not fast enough audio will cutout.</b>"));
+  audio_options_layout->addWidget(new QLabel(tr("Resampling Quality:")), 0, 0);
+  audio_options_layout->addWidget(m_audio_resampling_box, 0, 1);
+
+  // Audio Playback Mode
+  m_audio_playback_mode_box = new QComboBox;
+  m_audio_playback_mode_box->addItem(tr("Direct Playback"));
+  m_audio_playback_mode_box->addItem(tr("Audio Stretching"));
+  m_audio_playback_mode_box->setToolTip(
+      tr("<b>Direct Playback:</b> Audio is played unaltered at a fixed latency.<br><br>"
+         "<b>Audio Stretching:</b> Audio is stretched without changing pitch to match emulation "
+         "speed."));
+  audio_options_layout->addWidget(new QLabel(tr("Playback Mode:")), 1, 0);
+  audio_options_layout->addWidget(m_audio_playback_mode_box, 1, 1);
+
+  // Audio Playback Mode Stack
+  m_audio_playback_mode_stack = new QStackedWidget;
+  audio_options_layout->addWidget(m_audio_playback_mode_stack, 2, 0, 1, 2);
+
+  // Direct Playback Settings
+  m_direct_playback_box = new QGroupBox(tr("Direct Playback Settings"));
+  auto* direct_playback_layout = new QHBoxLayout(m_direct_playback_box);
+  m_direct_playback_latency = new QSlider(Qt::Horizontal);
+  m_direct_playback_indicator = new QLabel();
+  m_direct_playback_latency->setRange(8, 64);
+  m_direct_playback_latency->setSingleStep(4);
+  m_direct_playback_latency->setTickInterval(4);
+  m_direct_playback_latency->setTickPosition(QSlider::TicksBelow);
+  direct_playback_layout->addWidget(new QLabel(tr("Buffer:")));
+  direct_playback_layout->addWidget(m_direct_playback_latency);
+  direct_playback_layout->addWidget(m_direct_playback_indicator);
+  m_audio_playback_mode_stack->addWidget(m_direct_playback_box);
+  m_direct_playback_latency->setToolTip(
+      tr("Sets the buffer size in milliseconds for direct playback.<br><br>"
+         "Higher values may hide small lag spikes at the expense of increased latency."));
+
+  // Audio Stretching Settings
+  m_stretching_box = new QGroupBox(tr("Audio Stretching Settings"));
+  auto* stretching_layout = new QHBoxLayout(m_stretching_box);
  m_stretching_buffer_slider = new QSlider(Qt::Horizontal);
  m_stretching_buffer_indicator = new QLabel();
-  m_stretching_buffer_label = new QLabel(tr("Buffer Size:"));
-  stretching_box->setLayout(stretching_layout);
+  m_stretching_buffer_slider->setRange(48, 256);
+  m_stretching_buffer_slider->setSingleStep(8);
+  m_stretching_buffer_slider->setTickInterval(16);
+  m_stretching_buffer_slider->setTickPosition(QSlider::TicksBelow);
+  stretching_layout->addWidget(new QLabel(tr("Buffer:")));
+  stretching_layout->addWidget(m_stretching_buffer_slider);
+  stretching_layout->addWidget(m_stretching_buffer_indicator);
+  m_audio_playback_mode_stack->addWidget(m_stretching_box);

-  m_stretching_buffer_slider->setMinimum(5);
-  m_stretching_buffer_slider->setMaximum(300);
-
-  m_stretching_enable->setToolTip(tr("Enables stretching of the audio to match emulation speed."));
-  m_stretching_buffer_slider->setToolTip(tr("Size of stretch buffer in milliseconds. "
-                                            "Values too low may cause audio crackling."));
-
-  stretching_layout->addWidget(m_stretching_enable, 0, 0, 1, -1);
-  stretching_layout->addWidget(m_stretching_buffer_label, 1, 0);
-  stretching_layout->addWidget(m_stretching_buffer_slider, 1, 1);
-  stretching_layout->addWidget(m_stretching_buffer_indicator, 1, 2);
+  m_stretching_buffer_slider->setToolTip(
+      tr("Sets the buffer size in milliseconds for audio stretching.<br><br>"
+         "Higher values may reduce audio crackling at the expense of increased latency."));

  dsp_box->setSizePolicy(QSizePolicy::Expanding, QSizePolicy::Fixed);

  auto* const main_vbox_layout = new QVBoxLayout;
  main_vbox_layout->addWidget(dsp_box);
  main_vbox_layout->addWidget(backend_box);
-  main_vbox_layout->addWidget(stretching_box);
+  main_vbox_layout->addWidget(m_audio_group);

  m_main_layout = new QHBoxLayout;
  m_main_layout->addLayout(main_vbox_layout);
@ -180,14 +230,19 @@ void AudioPane::ConnectWidgets()
  {
    connect(m_latency_spin, &QSpinBox::valueChanged, this, &AudioPane::SaveSettings);
  }
-  connect(m_stretching_buffer_slider, &QSlider::valueChanged, this, &AudioPane::SaveSettings);
+
  connect(m_dolby_pro_logic, &QCheckBox::toggled, this, &AudioPane::SaveSettings);
  connect(m_dolby_quality_slider, &QSlider::valueChanged, this, &AudioPane::SaveSettings);
-  connect(m_stretching_enable, &QCheckBox::toggled, this, &AudioPane::SaveSettings);
  connect(m_dsp_hle, &QRadioButton::toggled, this, &AudioPane::SaveSettings);
  connect(m_dsp_lle, &QRadioButton::toggled, this, &AudioPane::SaveSettings);
  connect(m_dsp_interpreter, &QRadioButton::toggled, this, &AudioPane::SaveSettings);

+  connect(m_audio_resampling_box, &QComboBox::currentIndexChanged, this, &AudioPane::SaveSettings);
+  connect(m_audio_playback_mode_box, &QComboBox::currentIndexChanged, this,
+          &AudioPane::SaveSettings);
+  connect(m_direct_playback_latency, &QSlider::valueChanged, this, &AudioPane::SaveSettings);
+  connect(m_stretching_buffer_slider, &QSlider::valueChanged, this, &AudioPane::SaveSettings);
+
 #ifdef _WIN32
  connect(m_wasapi_device_combo, &QComboBox::currentIndexChanged, this, &AudioPane::SaveSettings);
 #endif
@ -242,13 +297,36 @@ void AudioPane::LoadSettings()
  if (m_latency_control_supported)
    m_latency_spin->setValue(Config::Get(Config::MAIN_AUDIO_LATENCY));

-  // Stretch
-  m_stretching_enable->setChecked(Config::Get(Config::MAIN_AUDIO_STRETCH));
-  m_stretching_buffer_label->setEnabled(m_stretching_enable->isChecked());
+  // Resampling
+  switch (Config::Get(Config::MAIN_AUDIO_SINC_WINDOW_WIDTH))
+  {
+  default:
+  case 6:
+    m_audio_resampling_box->setCurrentIndex(0);
+    break;
+  case 16:
+    m_audio_resampling_box->setCurrentIndex(1);
+    break;
+  case 48:
+    m_audio_resampling_box->setCurrentIndex(2);
+    break;
+  case 128:
+    m_audio_resampling_box->setCurrentIndex(3);
+    break;
+  }
+
+  // Playback Mode
+  int playback_mode = Config::Get(Config::MAIN_AUDIO_STRETCH) ? 1 : 0;
+  m_audio_playback_mode_box->setCurrentIndex(playback_mode);
+  m_audio_playback_mode_stack->setCurrentIndex(playback_mode);
+
+  m_direct_playback_latency->setValue(Config::Get(Config::MAIN_AUDIO_DIRECT_LATENCY));
+  m_direct_playback_indicator->setText(
+      tr("%1 ms").arg(Config::Get(Config::MAIN_AUDIO_DIRECT_LATENCY)));
+
  m_stretching_buffer_slider->setValue(Config::Get(Config::MAIN_AUDIO_STRETCH_LATENCY));
-  m_stretching_buffer_slider->setEnabled(m_stretching_enable->isChecked());
-  m_stretching_buffer_indicator->setEnabled(m_stretching_enable->isChecked());
-  m_stretching_buffer_indicator->setText(tr("%1 ms").arg(m_stretching_buffer_slider->value()));
+  m_stretching_buffer_indicator->setText(
+      tr("%1 ms").arg(Config::Get(Config::MAIN_AUDIO_STRETCH_LATENCY)));

 #ifdef _WIN32
  if (Config::Get(Config::MAIN_WASAPI_DEVICE) == "default")
@ -310,12 +388,34 @@ void AudioPane::SaveSettings()
  if (m_latency_control_supported)
    Config::SetBaseOrCurrent(Config::MAIN_AUDIO_LATENCY, m_latency_spin->value());

+  // Resampling
+  switch (m_audio_resampling_box->currentIndex())
+  {
+  default:
+  case 0:
+    Config::SetBaseOrCurrent(Config::MAIN_AUDIO_SINC_WINDOW_WIDTH, 6);
+    break;
+  case 1:
+    Config::SetBaseOrCurrent(Config::MAIN_AUDIO_SINC_WINDOW_WIDTH, 16);
+    break;
+  case 2:
+    Config::SetBaseOrCurrent(Config::MAIN_AUDIO_SINC_WINDOW_WIDTH, 48);
+    break;
+  case 3:
+    Config::SetBaseOrCurrent(Config::MAIN_AUDIO_SINC_WINDOW_WIDTH, 128);
+    break;
+  }
+
  // Stretch
-  Config::SetBaseOrCurrent(Config::MAIN_AUDIO_STRETCH, m_stretching_enable->isChecked());
+  int playback_mode = m_audio_playback_mode_box->currentIndex();
+  Config::SetBaseOrCurrent(Config::MAIN_AUDIO_STRETCH, playback_mode == 1);
+  m_audio_playback_mode_stack->setCurrentIndex(playback_mode);
+
+  Config::SetBaseOrCurrent(Config::MAIN_AUDIO_DIRECT_LATENCY, m_direct_playback_latency->value());
+  m_direct_playback_indicator->setText(
+      tr("%1 ms").arg(Config::Get(Config::MAIN_AUDIO_DIRECT_LATENCY)));
+
  Config::SetBaseOrCurrent(Config::MAIN_AUDIO_STRETCH_LATENCY, m_stretching_buffer_slider->value());
-  m_stretching_buffer_label->setEnabled(m_stretching_enable->isChecked());
-  m_stretching_buffer_slider->setEnabled(m_stretching_enable->isChecked());
-  m_stretching_buffer_indicator->setEnabled(m_stretching_enable->isChecked());
  m_stretching_buffer_indicator->setText(
      tr("%1 ms").arg(Config::Get(Config::MAIN_AUDIO_STRETCH_LATENCY)));

--- a/Source/Core/DolphinQt/Settings/AudioPane.h
+++ b/Source/Core/DolphinQt/Settings/AudioPane.h
@ -12,11 +12,13 @@ enum class DPL2Quality;

 class QCheckBox;
 class QComboBox;
+class QGroupBox;
 class QHBoxLayout;
 class QLabel;
 class QRadioButton;
 class QSlider;
 class QSpinBox;
+class QStackedWidget;
 class SettingsWindow;

 class AudioPane final : public QWidget
@ -71,9 +73,17 @@ private:
  QComboBox* m_wasapi_device_combo;
 #endif

-  // Audio Stretching
-  QCheckBox* m_stretching_enable;
-  QLabel* m_stretching_buffer_label;
+  // Audio Options
+  QGroupBox* m_audio_group;
+  QComboBox* m_audio_resampling_box;
+  QComboBox* m_audio_playback_mode_box;
+  QStackedWidget* m_audio_playback_mode_stack;
+
+  QGroupBox* m_direct_playback_box;
+  QSlider* m_direct_playback_latency;
+  QLabel* m_direct_playback_indicator;
+
+  QGroupBox* m_stretching_box;
  QSlider* m_stretching_buffer_slider;
  QLabel* m_stretching_buffer_indicator;
 };
--- a/Source/Core/VideoCommon/PerformanceMetrics.cpp
+++ b/Source/Core/VideoCommon/PerformanceMetrics.cpp
@ -19,11 +19,11 @@ void PerformanceMetrics::Reset()
 {
  m_fps_counter.Reset();
  m_vps_counter.Reset();
+  m_audio_speed_counter.Reset();
  m_speed_counter.Reset();
+  m_max_speed_counter.Reset();

-  m_time_sleeping = DT::zero();
-  m_real_times.fill(Clock::now());
-  m_cpu_times.fill(Core::System::GetInstance().GetCoreTiming().GetCPUTimePoint(0));
+  m_prev_adjusted_time = Clock::now() - m_time_sleeping;
 }

 void PerformanceMetrics::CountFrame()
@ -36,6 +36,11 @@ void PerformanceMetrics::CountVBlank()
  m_vps_counter.Count();
 }

+void PerformanceMetrics::CountAudioLatency(DT latency)
+{
+  m_audio_latency_counter.Count(latency, false);
+}
+
 void PerformanceMetrics::CountThrottleSleep(DT sleep)
 {
  std::unique_lock lock(m_time_lock);
@ -45,11 +50,13 @@ void PerformanceMetrics::CountThrottleSleep(DT sleep)
 void PerformanceMetrics::CountPerformanceMarker(Core::System& system, s64 cyclesLate)
 {
  std::unique_lock lock(m_time_lock);
+  const TimePoint adjusted_time = Clock::now() - m_time_sleeping;
+
+  m_audio_speed_counter.Count();
  m_speed_counter.Count();

-  m_real_times[m_time_index] = Clock::now() - m_time_sleeping;
-  m_cpu_times[m_time_index] = system.GetCoreTiming().GetCPUTimePoint(cyclesLate);
-  m_time_index += 1;
+  m_max_speed_counter.Count(adjusted_time - m_prev_adjusted_time);
+  m_prev_adjusted_time = adjusted_time;
 }

 double PerformanceMetrics::GetFPS() const
@ -67,11 +74,14 @@ double PerformanceMetrics::GetSpeed() const
  return m_speed_counter.GetHzAvg() / 100.0;
 }

+double PerformanceMetrics::GetAudioSpeed() const
+{
+  return m_audio_speed_counter.GetHzAvg() / 100.0;
+}
+
 double PerformanceMetrics::GetMaxSpeed() const
 {
-  std::shared_lock lock(m_time_lock);
-  return DT_s(m_cpu_times[u8(m_time_index - 1)] - m_cpu_times[m_time_index]) /
-         DT_s(m_real_times[u8(m_time_index - 1)] - m_real_times[m_time_index]);
+  return m_max_speed_counter.GetHzAvg() / 100.0;
 }

 double PerformanceMetrics::GetLastSpeedDenominator() const
@ -145,9 +155,13 @@ void PerformanceMetrics::DrawImGuiStats(const float backbuffer_scale)
                                                                   1000.0,
                                                                   2000.0};

-      const DT vblank_time = m_vps_counter.GetDtAvg() + 2 * m_vps_counter.GetDtStd();
-      const DT frame_time = m_fps_counter.GetDtAvg() + 2 * m_fps_counter.GetDtStd();
-      const double target_max_time = DT_ms(vblank_time + frame_time).count();
+      const double vblank_time =
+          DT_ms(m_vps_counter.GetDtAvg() + 2 * m_vps_counter.GetDtStd()).count();
+      const double frame_time =
+          DT_ms(m_fps_counter.GetDtAvg() + 2 * m_fps_counter.GetDtStd()).count();
+      const double audio_latency = DT_ms(m_audio_latency_counter.GetDtAvg()).count();
+
+      const double target_max_time = 2.0 * std::max({vblank_time, frame_time, audio_latency});
      const double a =
          std::max(0.0, 1.0 - std::exp(-4.0 * (DT_s(m_fps_counter.GetLastRawDt()) /
                                               DT_s(m_fps_counter.GetSampleWindow()))));
@ -176,6 +190,7 @@ void PerformanceMetrics::DrawImGuiStats(const float backbuffer_scale)
        ImPlot::SetupAxisTicks(ImAxis_Y1, tick_marks.data(), num_ticks);
        ImPlot::SetupAxesLimits(0, total_frame_time, 0, m_graph_max_time, ImGuiCond_Always);
        ImPlot::SetupLegend(ImPlotLocation_SouthEast, ImPlotLegendFlags_None);
+        m_audio_latency_counter.ImPlotPlotLines("Audio Latency (ms)");
        m_vps_counter.ImPlotPlotLines("V-Blank (ms)");
        m_fps_counter.ImPlotPlotLines("Frame (ms)");
        ImPlot::EndPlot();
--- a/Source/Core/VideoCommon/PerformanceMetrics.h
+++ b/Source/Core/VideoCommon/PerformanceMetrics.h
@ -30,6 +30,7 @@ public:
  void CountFrame();
  void CountVBlank();

+  void CountAudioLatency(DT latency);
  void CountThrottleSleep(DT sleep);
  void CountPerformanceMarker(Core::System& system, s64 cyclesLate);

@ -37,6 +38,7 @@ public:
  double GetFPS() const;
  double GetVPS() const;
  double GetSpeed() const;
+  double GetAudioSpeed() const;
  double GetMaxSpeed() const;

  double GetLastSpeedDenominator() const;
@ -47,15 +49,17 @@ public:
 private:
  PerformanceTracker m_fps_counter{"render_times.txt"};
  PerformanceTracker m_vps_counter{"vblank_times.txt"};
-  PerformanceTracker m_speed_counter{std::nullopt, 1000000};
+
+  PerformanceTracker m_audio_latency_counter{};
+  PerformanceTracker m_audio_speed_counter{std::nullopt, 128000};
+
+  PerformanceTracker m_speed_counter{std::nullopt, 1280000};
+  PerformanceTracker m_max_speed_counter{std::nullopt, 1280000};

  double m_graph_max_time = 0.0;

  mutable std::shared_mutex m_time_lock;
-
-  u8 m_time_index = 0;
-  std::array<TimePoint, 256> m_real_times{};
-  std::array<TimePoint, 256> m_cpu_times{};
+  TimePoint m_prev_adjusted_time{};
  DT m_time_sleeping{};
 };

--- a/Source/Core/VideoCommon/PerformanceTracker.cpp
+++ b/Source/Core/VideoCommon/PerformanceTracker.cpp
@ -47,7 +47,7 @@ void PerformanceTracker::Reset()
  m_dt_std = std::nullopt;
 }

-void PerformanceTracker::Count()
+void PerformanceTracker::Count(std::optional<DT> custom_value, bool value_is_duration)
 {
  std::unique_lock lock{m_mutex};

@ -57,26 +57,31 @@ void PerformanceTracker::Count()
  const DT window{GetSampleWindow()};

  const TimePoint time{Clock::now()};
-  const DT diff{time - m_last_time};
-
+  const DT duration{time - m_last_time};
+  const DT value{custom_value.value_or(duration)};
+  const TimeDataPair data_point{value_is_duration ? value : duration, value};
  m_last_time = time;

-  QueuePush(diff);
-  m_dt_total += diff;
+  QueuePush(data_point);
+  m_dt_total += data_point;

  if (m_dt_queue_begin == m_dt_queue_end)
    m_dt_total -= QueuePop();

-  while (window <= m_dt_total - QueueTop())
+  while (window <= m_dt_total.duration - QueueTop().duration)
    m_dt_total -= QueuePop();

  // Simple Moving Average Throughout the Window
-  m_dt_avg = m_dt_total / QueueSize();
-  const double hz = DT_s(1.0) / m_dt_avg;
+  // We want the average value, so we use the value
+  m_dt_avg = m_dt_total.value / QueueSize();
+
+  // Even though the frequency does not make sense if the value
+  // is not the duration, it is still useful to have the value
+  const double hz = DT_s(QueueSize()) / m_dt_total.value;

  // Exponential Moving Average
-  const DT_s rc = SAMPLE_RC_RATIO * std::min(window, m_dt_total);
-  const double a = 1.0 - std::exp(-(DT_s(diff) / rc));
+  const DT_s rc = SAMPLE_RC_RATIO * window;
+  const double a = 1.0 - std::exp(-(DT_s(data_point.duration) / rc));

  // Sometimes euler averages can break when the average is inf/nan
  if (std::isfinite(m_hz_avg))
@ -86,7 +91,7 @@ void PerformanceTracker::Count()

  m_dt_std = std::nullopt;

-  LogRenderTimeToFile(diff);
+  LogRenderTimeToFile(data_point.value);
 }

 DT PerformanceTracker::GetSampleWindow() const
@ -121,7 +126,7 @@ DT PerformanceTracker::GetDtStd() const
  double total = 0.0;
  for (std::size_t i = m_dt_queue_begin; i != m_dt_queue_end; i = IncrementIndex(i))
  {
-    double diff = DT_s(m_dt_queue[i] - m_dt_avg).count();
+    double diff = DT_s(m_dt_queue[i].value - m_dt_avg).count();
    total += diff * diff;
  }

@ -136,7 +141,7 @@ DT PerformanceTracker::GetLastRawDt() const
  if (QueueEmpty())
    return DT::zero();

-  return QueueBottom();
+  return QueueBottom().value;
 }

 void PerformanceTracker::ImPlotPlotLines(const char* label) const
@ -152,35 +157,32 @@ void PerformanceTracker::ImPlotPlotLines(const char* label) const
  const bool quality = QueueSize() < MAX_QUALITY_GRAPH_SIZE;

  const DT update_time = Clock::now() - m_last_time;
-  const float predicted_frame_time = DT_ms(std::max(update_time, QueueBottom())).count();

  std::size_t points = 0;
-  if (quality)
-  {
-    x[points] = 0.f;
-    y[points] = predicted_frame_time;
-    ++points;
-  }
+  x[points] = 0.f;
+  y[points] = DT_ms(QueueBottom().value).count();
+  ++points;

  x[points] = DT_ms(update_time).count();
-  y[points] = predicted_frame_time;
+  y[points] = y[points - 1];
  ++points;

  const std::size_t begin = DecrementIndex(m_dt_queue_end);
  const std::size_t end = DecrementIndex(m_dt_queue_begin);
  for (std::size_t i = begin; i != end; i = DecrementIndex(i))
  {
-    const float frame_time_ms = DT_ms(m_dt_queue[i]).count();
+    const float frame_duration_ms = DT_ms(m_dt_queue[i].duration).count();
+    const float frame_value_ms = DT_ms(m_dt_queue[i].value).count();

    if (quality)
    {
      x[points] = x[points - 1];
-      y[points] = frame_time_ms;
+      y[points] = frame_value_ms;
      ++points;
    }

-    x[points] = x[points - 1] + frame_time_ms;
-    y[points] = frame_time_ms;
+    x[points] = x[points - 1] + frame_duration_ms;
+    y[points] = frame_value_ms;
    ++points;
  }

@ -194,25 +196,25 @@ void PerformanceTracker::QueueClear()
  m_dt_queue_end = 0;
 }

-void PerformanceTracker::QueuePush(DT dt)
+void PerformanceTracker::QueuePush(TimeDataPair dt)
 {
  m_dt_queue[m_dt_queue_end] = dt;
  m_dt_queue_end = IncrementIndex(m_dt_queue_end);
 }

-const DT& PerformanceTracker::QueuePop()
+const PerformanceTracker::TimeDataPair& PerformanceTracker::QueuePop()
 {
  const std::size_t top = m_dt_queue_begin;
  m_dt_queue_begin = IncrementIndex(m_dt_queue_begin);
  return m_dt_queue[top];
 }

-const DT& PerformanceTracker::QueueTop() const
+const PerformanceTracker::TimeDataPair& PerformanceTracker::QueueTop() const
 {
  return m_dt_queue[m_dt_queue_begin];
 }

-const DT& PerformanceTracker::QueueBottom() const
+const PerformanceTracker::TimeDataPair& PerformanceTracker::QueueBottom() const
 {
  return m_dt_queue[DecrementIndex(m_dt_queue_end)];
 }
--- a/Source/Core/VideoCommon/PerformanceTracker.h
+++ b/Source/Core/VideoCommon/PerformanceTracker.h
@ -15,7 +15,7 @@ class PerformanceTracker
 {
 private:
  // Must be powers of 2 for masking to work
-  static constexpr u64 MAX_DT_QUEUE_SIZE = 1UL << 12;
+  static constexpr u64 MAX_DT_QUEUE_SIZE = 1UL << 13;
  static constexpr u64 MAX_QUALITY_GRAPH_SIZE = 1UL << 8;

  static inline std::size_t IncrementIndex(const std::size_t index)
@ -33,6 +33,31 @@ private:
    return (end - begin) & (MAX_DT_QUEUE_SIZE - 1);
  }

+  struct TimeDataPair
+  {
+  public:
+    TimeDataPair(DT duration_dt, DT value_dt) : duration{duration_dt}, value{value_dt} {}
+    TimeDataPair(DT duration_dt) : TimeDataPair{duration_dt, duration_dt} {}
+    TimeDataPair() : TimeDataPair{DT::zero()} {}
+
+    TimeDataPair& operator+=(const TimeDataPair& other)
+    {
+      duration += other.duration;
+      value += other.value;
+      return *this;
+    }
+
+    TimeDataPair& operator-=(const TimeDataPair& other)
+    {
+      duration -= other.duration;
+      value -= other.value;
+      return *this;
+    }
+
+  public:
+    DT duration, value;
+  };
+
 public:
  PerformanceTracker(const std::optional<std::string> log_name = std::nullopt,
                     const std::optional<s64> sample_window_us = std::nullopt);
@ -45,7 +70,20 @@ public:

  // Functions for recording performance information
  void Reset();
-  void Count();
+
+  /**
+   * custom_value can be used if you are recording something with it's own DT. For example,
+   * if you are recording the fallback of the throttler or the latency of the frame.
+   *
+   * If a custom_value is not supplied, the value will be set to the time between calls aka,
+   * duration. This is the most common use case of this class, as an FPS counter.
+   *
+   * The boolean value_is_duration should be set to true if the custom DTs you are providing
+   * represent a continuous duration. For example, the present times from a render backend
+   * would set value_is_duration to true. Things like throttler fallback or frame latency
+   * are not continuous, so they should not represent duration.
+   */
+  void Count(std::optional<DT> custom_value = std::nullopt, bool value_is_duration = false);

  // Functions for reading performance information
  DT GetSampleWindow() const;
@ -61,13 +99,13 @@ public:

 private:  // Functions for managing dt queue
  inline void QueueClear();
-  inline void QueuePush(DT dt);
-  inline const DT& QueuePop();
-  inline const DT& QueueTop() const;
-  inline const DT& QueueBottom() const;
+  inline void QueuePush(TimeDataPair dt);
+  inline const TimeDataPair& QueuePop();
+  inline const TimeDataPair& QueueTop() const;
+  inline const TimeDataPair& QueueBottom() const;

-  std::size_t inline QueueSize() const;
-  bool inline QueueEmpty() const;
+  inline std::size_t QueueSize() const;
+  inline bool QueueEmpty() const;

  // Handle pausing and logging
  void LogRenderTimeToFile(DT val);
@ -87,8 +125,8 @@ private:  // Functions for managing dt queue
  const std::optional<s64> m_sample_window_us;

  // Queue + Running Total used to calculate average dt
-  DT m_dt_total = DT::zero();
-  std::array<DT, MAX_DT_QUEUE_SIZE> m_dt_queue;
+  TimeDataPair m_dt_total;
+  std::array<TimeDataPair, MAX_DT_QUEUE_SIZE> m_dt_queue;
  std::size_t m_dt_queue_begin = 0;
  std::size_t m_dt_queue_end = 0;