Update to v098r07 release.

byuu says: Changelog: - GB: support modeSelect and RAM for MBC1M (Momotarou Collection) - audio: implemented native resampling support into Emulator::Stream - audio: removed nall::DSP completely Unfortunately, the new resampler didn't turn out quite as fast as I had hoped. The final hermite resampling added some overhead; and I had to bump up the kernel count to 500 from 400 to get the buzzing to go away on my main PC. I think that's due to it running at 48000hz output instead of 44100hz output, maybe? Compared to Ryphecha's: (NES) Mega Man 2: 167fps -> 166fps (GB) Mega Man II: 224fps -> 200fps (WSC) Riviera: 143fps -> 151fps Odd that the WS/WSC ends up faster while the DMG/CGB ends up slower. But this knocks 922 lines down to 146 lines. The only files left in all of higan not written (or rewritten) by me are ruby/xaudio2.h and libco/ppc.c
2016-04-23 17:55:59 +10:00 · 2016-04-23 17:55:59 +10:00 · 7cdae5195a
parent e2ee6689a0
commit 7cdae5195a
26 changed files with 248 additions and 1293 deletions
--- a/higan/audio/audio.cpp
+++ b/higan/audio/audio.cpp
@ -2,26 +2,9 @@

 namespace Emulator {

+#include "stream.cpp"
 Audio audio;

-Stream::Stream(double inputFrequency, double outputFrequency, double volume, double balance) {
-  dsp.setChannels(2);
-  dsp.setPrecision(16);
-  dsp.setFrequency(inputFrequency);
-  dsp.setResampler(DSP::ResampleEngine::Sinc);
-  dsp.setResamplerFrequency(outputFrequency);
-  dsp.setVolume(volume);
-  dsp.setBalance(balance);
-}
-
-auto Stream::sample(int16 left, int16 right) -> void {
-  int samples[] = {left, right};
-  dsp.sample(samples);
-  audio.poll();
-}
-
-//
-
 auto Audio::reset() -> void {
  streams.reset();
  setReverbDelay(reverbDelay);
@ -33,17 +16,15 @@ auto Audio::setInterface(Interface* interface) -> void {

 auto Audio::setFrequency(double frequency) -> void {
  this->frequency = frequency;
-  for(auto& stream : streams) stream->dsp.setResamplerFrequency(frequency);
+  for(auto& stream : streams) stream->setFrequency(frequency);
 }

 auto Audio::setVolume(double volume) -> void {
  this->volume = volume;
-  for(auto& stream : streams) stream->dsp.setVolume(volume);
 }

 auto Audio::setBalance(double balance) -> void {
  this->balance = balance;
-  for(auto& stream : streams) stream->dsp.setBalance(balance);
 }

 auto Audio::setReverbDelay(uint reverbDelay) -> void {
@ -58,8 +39,9 @@ auto Audio::setReverbLevel(double reverbLevel) -> void {
  this->reverbLevel = reverbLevel;
 }

-auto Audio::createStream(double frequency) -> shared_pointer<Stream> {
-  shared_pointer<Stream> stream = new Stream{frequency, this->frequency, volume, balance};
+auto Audio::createStream(uint channels, double frequency) -> shared_pointer<Stream> {
+  shared_pointer<Stream> stream = new Stream{channels, frequency};
+  stream->setFrequency(this->frequency);
  streams.append(stream);
  return stream;
 }
@ -68,25 +50,37 @@ auto Audio::createStream(double frequency) -> shared_pointer<Stream> {
 auto Audio::poll() -> void {
  while(true) {
    for(auto& stream : streams) {
-      if(!stream->dsp.pending()) return;
+      if(!stream->pending()) return;
    }

-    int left = 0, right = 0;
+    double left = 0.0, right = 0.0;
    for(auto& stream : streams) {
-      int samples[2];
-      stream->dsp.read(samples);
-      left += samples[0];
+      double samples[2];
+      stream->read(samples);
+      left  += samples[0];
      right += samples[1];
    }
+    left  /= streams.size();
+    right /= streams.size();
+
+    if(balance < 0.0) right *= 1.0 + balance;
+    if(balance > 0.0) left  *= 1.0 - balance;
+
+    //todo: apply volume, reverb before denormalization?
+    int ileft  = (left  * 65535.0) - 32768.0;
+    int iright = (right * 65535.0) - 32768.0;
+
+    ileft  *= volume;
+    iright *= volume;

    if(reverbDelay) {
-      reverbLeft.append(left);
-      reverbRight.append(right);
-      left += reverbLeft.takeFirst() * reverbLevel;
-      right += reverbRight.takeFirst() * reverbLevel;
+      reverbLeft.append(ileft);
+      reverbRight.append(iright);
+      ileft  += reverbLeft.takeFirst()  * reverbLevel;
+      iright += reverbRight.takeFirst() * reverbLevel;
    }

-    interface->audioSample(sclamp<16>(left), sclamp<16>(right));
+    interface->audioSample(sclamp<16>(ileft), sclamp<16>(iright));
  }
 }

--- a/higan/audio/audio.hpp
+++ b/higan/audio/audio.hpp
@ -1,20 +1,10 @@
 #pragma once

-#include "core.hpp"
-
 namespace Emulator {

 struct Interface;
-
-struct Stream {
-  Stream(double inputFrequency, double outputFrequency, double volume, double balance);
-  auto sample(int16 left, int16 right) -> void;
-
-private:
-  nall::DSP dsp;
-
-  friend class Audio;
-};
+struct Audio;
+struct Stream;

 struct Audio {
  auto reset() -> void;
@ -26,7 +16,7 @@ struct Audio {
  auto setReverbDelay(uint milliseconds) -> void;
  auto setReverbLevel(double level) -> void;

-  auto createStream(double frequency) -> shared_pointer<Stream>;
+  auto createStream(uint channels, double frequency) -> shared_pointer<Stream>;

  auto poll() -> void;

@ -44,6 +34,50 @@ private:
  friend class Stream;
 };

+struct Stream {
+  Stream(uint channels, double inputFrequency);
+  ~Stream();
+
+  auto reset() -> void;
+  auto setFrequency(double outputFrequency) -> void;
+
+  auto pending() const -> bool;
+  auto read(double* samples) -> void;
+  auto write(int16* samples) -> void;
+
+  template<typename... P> auto sample(P&&... p) -> void {
+    int16 samples[sizeof...(P)] = {forward<P>(p)...};
+    write(samples);
+  }
+
+private:
+  const uint channels;
+  const double inputFrequency;
+  double outputFrequency = 0.0;
+  double cutoffFrequency = 0.0;
+
+  double* tap = nullptr;
+  uint taps = 0;
+
+  uint decimationRate = 0;
+  uint decimationOffset = 0;
+
+  double** input = nullptr;
+  uint inputOffset = 0;
+
+  double resamplerFrequency = 0.0;
+  double resamplerFraction = 0.0;
+  double resamplerStep = 0.0;
+  double** queue = nullptr;
+
+  double** output = nullptr;
+  uint outputs = 0;
+  uint outputReadOffset = 0;
+  uint outputWriteOffset = 0;
+
+  friend class Audio;
+};
+
 extern Audio audio;

 }
--- a/higan/audio/buffer.hpp
+++ b/higan/audio/buffer.hpp
@ -1,50 +0,0 @@
-#pragma once
-
-struct Buffer {
-  Buffer() {
-  }
-
-  ~Buffer() {
-    setChannels(0);
-  }
-
-  auto setChannels(uint channels) -> void {
-    if(sample) {
-      for(auto c : range(this->channels)) {
-        if(sample[c]) delete[] sample[c];
-      }
-      delete[] sample;
-    }
-
-    this->channels = channels;
-    if(channels == 0) return;
-
-    sample = new double*[channels];
-    for(auto c : range(channels)) {
-      sample[c] = new double[65536]();
-    }
-  }
-
-  inline auto read(uint channel, int offset = 0) -> double& {
-    return sample[channel][(uint16_t)(rdoffset + offset)];
-  }
-
-  inline auto write(uint channel, int offset = 0) -> double& {
-    return sample[channel][(uint16_t)(wroffset + offset)];
-  }
-
-  inline auto clear() -> void {
-    for(auto c : range(channels)) {
-      for(auto n : range(65536)) {
-        sample[c][n] = 0;
-      }
-    }
-    rdoffset = 0;
-    wroffset = 0;
-  }
-
-  double** sample = nullptr;
-  uint16_t rdoffset = 0;
-  uint16_t wroffset = 0;
-  uint channels = 0;
-};
--- a/higan/audio/core.hpp
+++ b/higan/audio/core.hpp
@ -1,164 +0,0 @@
-#pragma once
-
-#include <math.h>
-#include <vector>
-#include <nall/stdint.hpp>
-
-namespace nall {
-
-struct DSP;
-
-struct Resampler {
-  Resampler(DSP& dsp) : dsp(dsp) {}
-  virtual ~Resampler() {}
-
-  virtual auto setFrequency() -> void = 0;
-  virtual auto clear() -> void = 0;
-  virtual auto sample() -> void = 0;
-
-  DSP& dsp;
-  double frequency = 44100.0;
-};
-
-struct DSP {
-  enum class ResampleEngine : uint {
-    Nearest,
-    Linear,
-    Cosine,
-    Cubic,
-    Hermite,
-    Average,
-    Sinc,
-  };
-
-  inline DSP();
-  inline ~DSP();
-
-  inline auto setChannels(uint channels) -> void;
-  inline auto setPrecision(uint precision) -> void;
-  inline auto setFrequency(double frequency) -> void;  //inputFrequency
-  inline auto setVolume(double volume) -> void;
-  inline auto setBalance(double balance) -> void;
-
-  inline auto setResampler(ResampleEngine resamplingEngine) -> void;
-  inline auto setResamplerFrequency(double frequency) -> void;  //outputFrequency
-
-  inline auto sample(int channel[]) -> void;
-  inline auto pending() const -> bool;
-  inline auto read(int channel[]) -> void;
-
-  inline auto clear() -> void;
-
-protected:
-  inline auto write(double channel[]) -> void;
-  inline auto adjustVolume() -> void;
-  inline auto adjustBalance() -> void;
-  inline auto clamp(const uint bits, const int input) -> int;
-
-  struct Settings {
-    uint channels;
-    uint precision;
-    double frequency;
-    double volume;
-    double balance;
-
-    //internal
-    double intensity;
-    double intensityInverse;
-  } settings;
-
-  Resampler* resampler = nullptr;
-
-  #include "buffer.hpp"
-  Buffer buffer;
-  Buffer output;
-
-  friend class ResampleNearest;
-  friend class ResampleLinear;
-  friend class ResampleCosine;
-  friend class ResampleCubic;
-  friend class ResampleAverage;
-  friend class ResampleHermite;
-  friend class ResampleSinc;
-};
-
-#include "resample/nearest.hpp"
-#include "resample/linear.hpp"
-#include "resample/cosine.hpp"
-#include "resample/cubic.hpp"
-#include "resample/hermite.hpp"
-#include "resample/average.hpp"
-#include "resample/sinc.hpp"
-#include "settings.hpp"
-
-DSP::DSP() {
-  setResampler(ResampleEngine::Hermite);
-  setResamplerFrequency(44100.0);
-
-  setChannels(2);
-  setPrecision(16);
-  setFrequency(44100.0);
-  setVolume(1.0);
-  setBalance(0.0);
-
-  clear();
-}
-
-DSP::~DSP() {
-  if(resampler) delete resampler;
-}
-
-auto DSP::sample(int channel[]) -> void {
-  for(auto c : range(settings.channels)) {
-    buffer.write(c) = (double)channel[c] * settings.intensityInverse;
-  }
-  buffer.wroffset++;
-  resampler->sample();
-}
-
-auto DSP::pending() const -> bool {
-  return output.rdoffset != output.wroffset;
-}
-
-auto DSP::read(int channel[]) -> void {
-  adjustVolume();
-  adjustBalance();
-
-  for(auto c : range(settings.channels)) {
-    channel[c] = clamp(settings.precision, output.read(c) * settings.intensity);
-  }
-  output.rdoffset++;
-}
-
-auto DSP::write(double channel[]) -> void {
-  for(auto c : range(settings.channels)) {
-    output.write(c) = channel[c];
-  }
-  output.wroffset++;
-}
-
-auto DSP::adjustVolume() -> void {
-  for(auto c : range(settings.channels)) {
-    output.read(c) *= settings.volume;
-  }
-}
-
-auto DSP::adjustBalance() -> void {
-  if(settings.channels != 2) return;  //TODO: support > 2 channels
-  if(settings.balance < 0.0) output.read(1) *= 1.0 + settings.balance;
-  if(settings.balance > 0.0) output.read(0) *= 1.0 - settings.balance;
-}
-
-auto DSP::clamp(const uint bits, const int x) -> int {
-  const int b = 1U << (bits - 1);
-  const int m = (1U << (bits - 1)) - 1;
-  return (x > m) ? m : (x < -b) ? -b : x;
-}
-
-auto DSP::clear() -> void {
-  buffer.clear();
-  output.clear();
-  resampler->clear();
-}
-
-}
--- a/higan/audio/resample/average.hpp
+++ b/higan/audio/resample/average.hpp
@ -1,72 +0,0 @@
-#pragma once
-
-struct ResampleAverage : Resampler {
-  ResampleAverage(DSP& dsp) : Resampler(dsp) {}
-
-  inline auto setFrequency() -> void;
-  inline auto clear() -> void;
-  inline auto sample() -> void;
-  inline auto sampleLinear() -> void;
-
-private:
-  double fraction;
-  double step;
-};
-
-auto ResampleAverage::setFrequency() -> void {
-  fraction = 0.0;
-  step = dsp.settings.frequency / frequency;
-}
-
-auto ResampleAverage::clear() -> void {
-  fraction = 0.0;
-}
-
-auto ResampleAverage::sample() -> void {
-  //can only average if input frequency >= output frequency
-  if(step < 1.0) return sampleLinear();
-
-  fraction += 1.0;
-
-  double scalar = 1.0;
-  if(fraction > step) scalar = 1.0 - (fraction - step);
-
-  for(auto c : range(dsp.settings.channels)) {
-    dsp.output.write(c) += dsp.buffer.read(c) * scalar;
-  }
-
-  if(fraction >= step) {
-    for(auto c : range(dsp.settings.channels)) {
-      dsp.output.write(c) /= step;
-    }
-    dsp.output.wroffset++;
-
-    fraction -= step;
-    for(auto c : range(dsp.settings.channels)) {
-      dsp.output.write(c) = dsp.buffer.read(c) * fraction;
-    }
-  }
-
-  dsp.buffer.rdoffset++;
-}
-
-auto ResampleAverage::sampleLinear() -> void {
-  while(fraction <= 1.0) {
-    double channel[dsp.settings.channels];
-
-    for(auto n : range(dsp.settings.channels)) {
-      double a = dsp.buffer.read(n, -1);
-      double b = dsp.buffer.read(n, -0);
-
-      double mu = fraction;
-
-      channel[n] = a * (1.0 - mu) + b * mu;
-    }
-
-    dsp.write(channel);
-    fraction += step;
-  }
-
-  dsp.buffer.rdoffset++;
-  fraction -= 1.0;
-}
--- a/higan/audio/resample/cosine.hpp
+++ b/higan/audio/resample/cosine.hpp
@ -1,44 +0,0 @@
-#pragma once
-
-struct ResampleCosine : Resampler {
-  ResampleCosine(DSP& dsp) : Resampler(dsp) {}
-
-  inline auto setFrequency() -> void;
-  inline auto clear() -> void;
-  inline auto sample() -> void;
-
-private:
-  double fraction;
-  double step;
-};
-
-auto ResampleCosine::setFrequency() -> void {
-  fraction = 0.0;
-  step = dsp.settings.frequency / frequency;
-}
-
-auto ResampleCosine::clear() -> void {
-  fraction = 0.0;
-}
-
-auto ResampleCosine::sample() -> void {
-  while(fraction <= 1.0) {
-    double channel[dsp.settings.channels];
-
-    for(auto n : range(dsp.settings.channels)) {
-      double a = dsp.buffer.read(n, -1);
-      double b = dsp.buffer.read(n, -0);
-
-      double mu = fraction;
-      mu = (1.0 - cos(mu * 3.14159265)) / 2.0;
-
-      channel[n] = a * (1.0 - mu) + b * mu;
-    }
-
-    dsp.write(channel);
-    fraction += step;
-  }
-
-  dsp.buffer.rdoffset++;
-  fraction -= 1.0;
-}
--- a/higan/audio/resample/cubic.hpp
+++ b/higan/audio/resample/cubic.hpp
@ -1,50 +0,0 @@
-#pragma once
-
-struct ResampleCubic : Resampler {
-  ResampleCubic(DSP& dsp) : Resampler(dsp) {}
-
-  inline auto setFrequency() -> void;
-  inline auto clear() -> void;
-  inline auto sample() -> void;
-
-private:
-  double fraction;
-  double step;
-};
-
-auto ResampleCubic::setFrequency() -> void {
-  fraction = 0.0;
-  step = dsp.settings.frequency / frequency;
-}
-
-auto ResampleCubic::clear() -> void {
-  fraction = 0.0;
-}
-
-auto ResampleCubic::sample() -> void {
-  while(fraction <= 1.0) {
-    double channel[dsp.settings.channels];
-
-    for(auto n : range(dsp.settings.channels)) {
-      double a = dsp.buffer.read(n, -3);
-      double b = dsp.buffer.read(n, -2);
-      double c = dsp.buffer.read(n, -1);
-      double d = dsp.buffer.read(n, -0);
-
-      double mu = fraction;
-
-      double A = d - c - a + b;
-      double B = a - b - A;
-      double C = c - a;
-      double D = b;
-
-      channel[n] = A * (mu * 3) + B * (mu * 2) + C * mu + D;
-    }
-
-    dsp.write(channel);
-    fraction += step;
-  }
-
-  dsp.buffer.rdoffset++;
-  fraction -= 1.0;
-}
--- a/higan/audio/resample/hermite.hpp
+++ b/higan/audio/resample/hermite.hpp
@ -1,62 +0,0 @@
-#pragma once
-
-struct ResampleHermite : Resampler {
-  ResampleHermite(DSP& dsp) : Resampler(dsp) {}
-
-  inline auto setFrequency() -> void;
-  inline auto clear() -> void;
-  inline auto sample() -> void;
-
-private:
-  double fraction;
-  double step;
-};
-
-auto ResampleHermite::setFrequency() -> void {
-  fraction = 0.0;
-  step = dsp.settings.frequency / frequency;
-}
-
-auto ResampleHermite::clear() -> void {
-  fraction = 0.0;
-}
-
-auto ResampleHermite::sample() -> void {
-  while(fraction <= 1.0) {
-    double channel[dsp.settings.channels];
-
-    for(auto n : range(dsp.settings.channels)) {
-      double a = dsp.buffer.read(n, -3);
-      double b = dsp.buffer.read(n, -2);
-      double c = dsp.buffer.read(n, -1);
-      double d = dsp.buffer.read(n, -0);
-
-      const double tension = 0.0;  //-1 = low, 0 = normal, +1 = high
-      const double bias = 0.0;  //-1 = left, 0 = even, +1 = right
-
-      double mu1, mu2, mu3, m0, m1, a0, a1, a2, a3;
-
-      mu1 = fraction;
-      mu2 = mu1 * mu1;
-      mu3 = mu2 * mu1;
-
-      m0  = (b - a) * (1.0 + bias) * (1.0 - tension) / 2.0;
-      m0 += (c - b) * (1.0 - bias) * (1.0 - tension) / 2.0;
-      m1  = (c - b) * (1.0 + bias) * (1.0 - tension) / 2.0;
-      m1 += (d - c) * (1.0 - bias) * (1.0 - tension) / 2.0;
-
-      a0 = +2 * mu3 - 3 * mu2 + 1;
-      a1 =      mu3 - 2 * mu2 + mu1;
-      a2 =      mu3 -     mu2;
-      a3 = -2 * mu3 + 3 * mu2;
-
-      channel[n] = (a0 * b) + (a1 * m0) + (a2 * m1) + (a3 * c);
-    }
-
-    dsp.write(channel);
-    fraction += step;
-  }
-
-  dsp.buffer.rdoffset++;
-  fraction -= 1.0;
-}
--- a/higan/audio/resample/lib/sinc.hpp
+++ b/higan/audio/resample/lib/sinc.hpp
@ -1,600 +0,0 @@
-// If these types are changed to anything other than "float", you should comment out the SSE detection directives below
-// so that the SSE code is not used.
-
-typedef float resample_coeff_t;	// note: sizeof(resample_coeff_t) must be == to a power of 2, and not larger than 16
-typedef float resample_samp_t;
-
-
-// ...but don't comment this single RESAMPLE_SSEREGPARM define out when disabling SSE.
-#define RESAMPLE_SSEREGPARM	
-
-#if defined(__SSE__)
-  #define SINCRESAMPLE_USE_SSE 1
-  #ifndef __x86_64__
-    #undef RESAMPLE_SSEREGPARM
-    #define RESAMPLE_SSEREGPARM __attribute__((sseregparm))
-  #endif
-#else
-  // TODO: altivec here
-#endif
-
-namespace ResampleUtility
-{
- inline void kaiser_window(double* io, int count, double beta);
- inline void gen_sinc(double* out, int size, double cutoff, double kaiser);
- inline void gen_sinc_os(double* out, int size, double cutoff, double kaiser);
- inline void normalize(double* io, int size, double gain = 1.0);
-
- inline void* make_aligned(void* ptr, unsigned boundary);	// boundary must be a power of 2
-}
-
-class SincResampleHR
-{
- private:
-
- inline void Init(unsigned ratio_arg, double desired_bandwidth, double beta, double d);
-
- inline void write(resample_samp_t sample) RESAMPLE_SSEREGPARM;
- inline resample_samp_t read(void) RESAMPLE_SSEREGPARM;
- inline bool output_avail(void);
-
- private:
-
- inline resample_samp_t mac(const resample_samp_t *wave, const resample_coeff_t *coeff, unsigned count);
-
- unsigned ratio;
- unsigned num_convolutions;
-
- resample_coeff_t *coeffs;
- std::vector<unsigned char> coeffs_mem;
-
- // second half of ringbuffer should be copy of first half.
- resample_samp_t *rb;
- std::vector<unsigned char> rb_mem;
-
- signed rb_readpos;
- signed rb_writepos;
- signed rb_in;
- signed rb_eff_size;
-
- friend class SincResample;
-};
-
-class SincResample
-{
- public:
-
- enum
- {
-  QUALITY_LOW = 0,
-  QUALITY_MEDIUM = 2,
-  QUALITY_HIGH = 4
- };
-
- inline SincResample(double input_rate, double output_rate, double desired_bandwidth, unsigned quality = QUALITY_HIGH);
-
- inline void write(resample_samp_t sample) RESAMPLE_SSEREGPARM;
- inline resample_samp_t read(void) RESAMPLE_SSEREGPARM;
- inline bool output_avail(void);
-
- private:
-
- inline void Init(double input_rate, double output_rate, double desired_bandwidth, double beta, double d, unsigned pn_nume, unsigned phases_min);
-
- inline resample_samp_t mac(const resample_samp_t *wave, const resample_coeff_t *coeffs_a, const resample_coeff_t *coeffs_b, const double ffract, unsigned count) RESAMPLE_SSEREGPARM;
-
- unsigned num_convolutions;
- unsigned num_phases;
-
- unsigned step_int;
- double step_fract;
-
- double input_pos_fract;
-
-
- std::vector<resample_coeff_t *> coeffs;	// Pointers into coeff_mem.
- std::vector<unsigned char> coeff_mem;
-
-
- std::vector<resample_samp_t> rb;	// second half should be copy of first half.
- signed rb_readpos;
- signed rb_writepos;
- signed rb_in;
-
- bool hr_used;
- SincResampleHR hr;
-};
-
-
-//
-// Code:
-//
-//#include "resample.hpp"
-
-#if 0
-namespace bit
-{
-    inline unsigned round(unsigned x) {
-      if((x & (x - 1)) == 0) return x;
-      while(x & (x - 1)) x &= x - 1;
-      return x << 1;
-    }
-}
-#endif
-
-void SincResampleHR::Init(unsigned ratio_arg, double desired_bandwidth, double beta, double d)
-{
- const unsigned align_boundary = 16;
- std::vector<double> coeffs_tmp;
- double cutoff;	// 1.0 = f/2
-
- ratio = ratio_arg;
-
- //num_convolutions = ((unsigned)ceil(d / ((1.0 - desired_bandwidth) / ratio)) + 1) &~ 1;	// round up to be even
- num_convolutions = ((unsigned)ceil(d / ((1.0 - desired_bandwidth) / ratio)) | 1);
-
- cutoff = (1.0 / ratio) - (d / num_convolutions);
-
-//printf("%d %d %.20f\n", ratio, num_convolutions, cutoff);
- assert(num_convolutions > ratio);
-
-
- // Generate windowed sinc of POWER
- coeffs_tmp.resize(num_convolutions);
- //ResampleUtility::gen_sinc(&coeffs_tmp[0], num_convolutions, cutoff, beta);
- ResampleUtility::gen_sinc_os(&coeffs_tmp[0], num_convolutions, cutoff, beta);
- ResampleUtility::normalize(&coeffs_tmp[0], num_convolutions);
-
- // Copy from coeffs_tmp to coeffs~
- // We multiply many coefficients at a time in the mac loop, so make sure the last few that don't really
- // exist are allocated, zero'd mem.
-
- coeffs_mem.resize(((num_convolutions + 7) &~ 7) * sizeof(resample_coeff_t) + (align_boundary - 1));
- coeffs = (resample_coeff_t *)ResampleUtility::make_aligned(&coeffs_mem[0], align_boundary);
-
-
- for(unsigned i = 0; i < num_convolutions; i++)
-  coeffs[i] = coeffs_tmp[i];
-
- rb_eff_size = nall::bit::round(num_convolutions * 2) >> 1;
- rb_readpos = 0;
- rb_writepos = 0;
- rb_in = 0;
-
- rb_mem.resize(rb_eff_size * 2 * sizeof(resample_samp_t) + (align_boundary - 1));
- rb = (resample_samp_t *)ResampleUtility::make_aligned(&rb_mem[0], align_boundary);
-}
-
-
-inline bool SincResampleHR::output_avail(void)
-{
- return(rb_in >= (signed)num_convolutions);
-}
-
-inline void SincResampleHR::write(resample_samp_t sample)
-{
- assert(!output_avail());
-
- rb[rb_writepos] = sample;
- rb[rb_writepos + rb_eff_size] = sample;
- rb_writepos = (rb_writepos + 1) & (rb_eff_size - 1);
- rb_in++;
-}
-
-resample_samp_t SincResampleHR::mac(const resample_samp_t *wave, const resample_coeff_t *coeff, unsigned count)
-{
-#if SINCRESAMPLE_USE_SSE
- __m128 accum_veca[2] = { _mm_set1_ps(0), _mm_set1_ps(0) };
- 
- resample_samp_t accum;
-
- for(unsigned c = 0; c < count; c += 8)
- {
-  for(unsigned i = 0; i < 2; i++)
-  {
-   __m128 co[2];
-   __m128 w[2];
-
-   co[i] = _mm_load_ps(&coeff[c + i * 4]);
-   w[i] = _mm_load_ps(&wave[c + i * 4]);
-
-   w[i] = _mm_mul_ps(w[i], co[i]);
-
-   accum_veca[i] = _mm_add_ps(w[i], accum_veca[i]);
-  }
- }
-
- __m128 accum_vec = _mm_add_ps(accum_veca[0], accum_veca[1]); //_mm_add_ps(_mm_add_ps(accum_veca[0], accum_veca[1]), _mm_add_ps(accum_veca[2], accum_veca[3]));
-
- accum_vec = _mm_add_ps(accum_vec, _mm_shuffle_ps(accum_vec, accum_vec, (3 << 0) | (2 << 2) | (1 << 4) | (0 << 6)));
- accum_vec = _mm_add_ps(accum_vec, _mm_shuffle_ps(accum_vec, accum_vec, (1 << 0) | (0 << 2) | (1 << 4) | (0 << 6)));
-
- _mm_store_ss(&accum, accum_vec);
-
- return accum;
-#else
- resample_samp_t accum[4] = { 0, 0, 0, 0 };
-
- for(unsigned c = 0; c < count; c+= 4)
- {
-  accum[0] += wave[c + 0] * coeff[c + 0];
-  accum[1] += wave[c + 1] * coeff[c + 1];
-  accum[2] += wave[c + 2] * coeff[c + 2];
-  accum[3] += wave[c + 3] * coeff[c + 3];
- }
-
- return (accum[0] + accum[1]) + (accum[2] + accum[3]);	// don't mess with parentheses(assuming compiler doesn't already, which it may...
-
-#endif
-}
-
-
-resample_samp_t SincResampleHR::read(void)
-{
- assert(output_avail());
- resample_samp_t ret;
-
- ret = mac(&rb[rb_readpos], &coeffs[0], num_convolutions);
-
- rb_readpos = (rb_readpos + ratio) & (rb_eff_size - 1);
- rb_in -= ratio;
-
- return ret;
-}
-
-
-SincResample::SincResample(double input_rate, double output_rate, double desired_bandwidth, unsigned quality)
-{
- const struct
- {
-  double beta;
-  double d;
-  unsigned pn_nume;
-  unsigned phases_min;
- } qtab[5] =
- {
-  { 5.658, 3.62, 4096, 4 },
-  { 6.764, 4.32, 8192, 4 },
-  { 7.865, 5.0, 16384, 8 },
-  { 8.960, 5.7, 32768, 16 },
-  { 10.056, 6.4, 65536, 32 }
- };
-
- // Sanity checks
- assert(ceil(input_rate) > 0);
- assert(ceil(output_rate) > 0);
- assert(ceil(input_rate / output_rate) <= 1024);
- assert(ceil(output_rate / input_rate) <= 1024);
-
- // The simplistic number-of-phases calculation code doesn't work well enough for when desired_bandwidth is close to 1.0 and when
- // upsampling.
- assert(desired_bandwidth >= 0.25 && desired_bandwidth < 0.96);
- assert(quality >= 0 && quality <= 4);
-
- hr_used = false;
-
-#if 1
- // Round down to the nearest multiple of 4(so wave buffer remains aligned)
- // It also adjusts the effective intermediate sampling rate up slightly, so that the upper frequencies below f/2
- // aren't overly attenuated so much.  In the future, we might want to do an FFT or something to choose the intermediate rate more accurately
- // to virtually eliminate over-attenuation.
- unsigned ioratio_rd = (unsigned)floor(input_rate / (output_rate * (1.0 + (1.0 - desired_bandwidth) / 2) )) & ~3;
-
- if(ioratio_rd >= 8)
- {
-  hr.Init(ioratio_rd, desired_bandwidth, qtab[quality].beta, qtab[quality].d); //10.056, 6.4); 
-  hr_used = true;
-
-  input_rate /= ioratio_rd;
- }
-#endif
-
- Init(input_rate, output_rate, desired_bandwidth, qtab[quality].beta, qtab[quality].d, qtab[quality].pn_nume, qtab[quality].phases_min);
-}
-
-void SincResample::Init(double input_rate, double output_rate, double desired_bandwidth, double beta, double d, unsigned pn_nume, unsigned phases_min)
-{
- const unsigned max_mult_atatime = 8;	// multiply "granularity".  must be power of 2.
- const unsigned max_mult_minus1 = (max_mult_atatime - 1);
- const unsigned conv_alignment_bytes = 16;	// must be power of 2
- const double input_to_output_ratio = input_rate / output_rate;
- const double output_to_input_ratio = output_rate / input_rate;
- double cutoff;		// 1.0 = input_rate / 2
- std::vector<double> coeff_init_buffer;
-
- // Round up num_convolutions to be even.
- if(output_rate > input_rate)
-  num_convolutions = ((unsigned)ceil(d / (1.0 - desired_bandwidth)) + 1) & ~1;
- else
-  num_convolutions = ((unsigned)ceil(d / (output_to_input_ratio * (1.0 - desired_bandwidth))) + 1) & ~1;
-
- if(output_rate > input_rate)	// Upsampling
-  cutoff = desired_bandwidth;
- else	// Downsampling
-  cutoff = output_to_input_ratio * desired_bandwidth;
-
- // Round up to be even.
- num_phases = (std::max<unsigned>(pn_nume / num_convolutions, phases_min) + 1) &~1;
-
- // Adjust cutoff to account for the multiple phases.
- cutoff = cutoff / num_phases;
-
- assert((num_convolutions & 1) == 0);
- assert((num_phases & 1) == 0);
-
-// fprintf(stderr, "num_convolutions=%u, num_phases=%u, total expected coeff byte size=%lu\n", num_convolutions, num_phases,
-//        (long)((num_phases + 2) * ((num_convolutions + max_mult_minus1) & ~max_mult_minus1) * sizeof(float) + conv_alignment_bytes));
-
- coeff_init_buffer.resize(num_phases * num_convolutions);
-
- coeffs.resize(num_phases + 1 + 1);
-
- coeff_mem.resize((num_phases + 1 + 1) * ((num_convolutions + max_mult_minus1) &~ max_mult_minus1) * sizeof(resample_coeff_t) + conv_alignment_bytes);
-
- // Assign aligned pointers into coeff_mem
- {
-  resample_coeff_t *base_ptr = (resample_coeff_t *)ResampleUtility::make_aligned(&coeff_mem[0], conv_alignment_bytes);
-
-  for(unsigned phase = 0; phase < (num_phases + 1 + 1); phase++)
-  {
-   coeffs[phase] = base_ptr + (((num_convolutions + max_mult_minus1) & ~max_mult_minus1) * phase);
-  }
- }
-
- ResampleUtility::gen_sinc(&coeff_init_buffer[0], num_phases * num_convolutions, cutoff, beta);
- ResampleUtility::normalize(&coeff_init_buffer[0], num_phases * num_convolutions, num_phases);
-
- // Reorder coefficients to allow for more efficient convolution.
- for(int phase = -1; phase < ((int)num_phases + 1); phase++)
- {
-  for(int conv = 0; conv < (int)num_convolutions; conv++)
-  {
-   double coeff;
-
-   if(phase == -1 && conv == 0)
-    coeff = 0;
-   else if(phase == (int)num_phases && conv == ((int)num_convolutions - 1))
-    coeff = 0;
-   else
-    coeff = coeff_init_buffer[conv * num_phases + phase];
-
-   coeffs[phase + 1][conv] = coeff;
-  }
- }
-
- // Free a bit of mem
- coeff_init_buffer.resize(0);
-
- step_int = floor(input_to_output_ratio);
- step_fract = input_to_output_ratio - step_int;
-
- input_pos_fract = 0;
-
- // Do NOT use rb.size() later in the code, since it'll include the padding.
- // We should only need one "max_mult_minus1" here, not two, since it won't matter if it over-reads(due to doing "max_mult_atatime" multiplications at a time
- // rather than just 1, in which case this over-read wouldn't happen), from the first half into the duplicated half,
- // since those corresponding coefficients will be zero anyway; this is just to handle the case of reading off the end of the duplicated half to
- // prevent illegal memory accesses.
- rb.resize(num_convolutions * 2 + max_mult_minus1);
-
- rb_readpos = 0;
- rb_writepos = 0;
- rb_in = 0;
-}
-
-resample_samp_t SincResample::mac(const resample_samp_t *wave, const resample_coeff_t *coeffs_a, const resample_coeff_t *coeffs_b, const double ffract, unsigned count)
-{
- resample_samp_t accum = 0;
-#if SINCRESAMPLE_USE_SSE
- __m128 accum_vec_a[2] = { _mm_set1_ps(0), _mm_set1_ps(0) };
- __m128 accum_vec_b[2] = { _mm_set1_ps(0), _mm_set1_ps(0) };
-
- for(unsigned c = 0; c < count; c += 8) //8) //4)
- {
-  __m128 coeff_a[2];
-  __m128 coeff_b[2];
-  __m128 w[2];
-  __m128 result_a[2], result_b[2];
-
-  for(unsigned i = 0; i < 2; i++)
-  {
-   coeff_a[i] = _mm_load_ps(&coeffs_a[c + (i * 4)]);
-   coeff_b[i] = _mm_load_ps(&coeffs_b[c + (i * 4)]);
-   w[i] = _mm_loadu_ps(&wave[c + (i * 4)]);
-
-   result_a[i] = _mm_mul_ps(coeff_a[i], w[i]);
-   result_b[i] = _mm_mul_ps(coeff_b[i], w[i]);
-
-   accum_vec_a[i] = _mm_add_ps(result_a[i], accum_vec_a[i]);
-   accum_vec_b[i] = _mm_add_ps(result_b[i], accum_vec_b[i]);
-  }
- }
-
- __m128 accum_vec, av_a, av_b;
- __m128 mult_a_vec = _mm_set1_ps(1.0 - ffract);
- __m128 mult_b_vec = _mm_set1_ps(ffract);
-
- av_a = _mm_mul_ps(mult_a_vec, /*accum_vec_a[0]);*/ _mm_add_ps(accum_vec_a[0], accum_vec_a[1]));
- av_b = _mm_mul_ps(mult_b_vec, /*accum_vec_b[0]);*/ _mm_add_ps(accum_vec_b[0], accum_vec_b[1]));
-
- accum_vec = _mm_add_ps(av_a, av_b);
-
- accum_vec = _mm_add_ps(accum_vec, _mm_shuffle_ps(accum_vec, accum_vec, (3 << 0) | (2 << 2) | (1 << 4) | (0 << 6)));
- accum_vec = _mm_add_ps(accum_vec, _mm_shuffle_ps(accum_vec, accum_vec, (1 << 0) | (0 << 2) | (1 << 4) | (0 << 6)));
-
- _mm_store_ss(&accum, accum_vec);
-#else
- resample_coeff_t mult_a = 1.0 - ffract;
- resample_coeff_t mult_b = ffract;
-
- for(unsigned c = 0; c < count; c += 4)
- {
-  accum += wave[c + 0] * (coeffs_a[c + 0] * mult_a + coeffs_b[c + 0] * mult_b);
-  accum += wave[c + 1] * (coeffs_a[c + 1] * mult_a + coeffs_b[c + 1] * mult_b);
-  accum += wave[c + 2] * (coeffs_a[c + 2] * mult_a + coeffs_b[c + 2] * mult_b);
-  accum += wave[c + 3] * (coeffs_a[c + 3] * mult_a + coeffs_b[c + 3] * mult_b);
- }
-#endif
-
- return accum;
-}
-
-inline bool SincResample::output_avail(void)
-{
- return(rb_in >= (int)num_convolutions);
-}
-
-resample_samp_t SincResample::read(void)
-{
- assert(output_avail());
- double phase = input_pos_fract * num_phases - 0.5;
- signed phase_int = (signed)floor(phase);
- double phase_fract = phase - phase_int;
- unsigned phase_a = num_phases - 1 - phase_int;
- unsigned phase_b = phase_a - 1;
- resample_samp_t ret;
-
- ret = mac(&rb[rb_readpos], &coeffs[phase_a + 1][0], &coeffs[phase_b + 1][0], phase_fract, num_convolutions);
-
- unsigned int_increment = step_int;
-
- input_pos_fract += step_fract;
- int_increment += floor(input_pos_fract);
- input_pos_fract -= floor(input_pos_fract);
-
- rb_readpos = (rb_readpos + int_increment) % num_convolutions;
- rb_in -= int_increment;
-
- return ret;
-}
-
-inline void SincResample::write(resample_samp_t sample)
-{
- assert(!output_avail());
-
- if(hr_used)
- {
-  hr.write(sample);
-
-  if(hr.output_avail())
-  {
-   sample = hr.read();
-  }
-  else
-  {
-   return;
-  }
- }
-
- rb[rb_writepos + 0 * num_convolutions] = sample;
- rb[rb_writepos + 1 * num_convolutions] = sample;
- rb_writepos = (rb_writepos + 1) % num_convolutions;
- rb_in++;
-}
-
-void ResampleUtility::kaiser_window( double* io, int count, double beta)
-{
-        int const accuracy = 24; //16; //12;
-
-        double* end = io + count;
-
-        double beta2    = beta * beta * (double) -0.25;
-        double to_fract = beta2 / ((double) count * count);
-        double i        = 0;
-        double rescale = 0; // Doesn't need an initializer, to shut up gcc
-
-        for ( ; io < end; ++io, i += 1 )
-        {
-                double x = i * i * to_fract - beta2;
-                double u = x;
-                double k = x + 1;
-
-                double n = 2;
-                do
-                {
-                        u *= x / (n * n);
-                        n += 1;
-                        k += u;
-                }
-                while ( k <= u * (1 << accuracy) );
-
-                if ( !i )
-                        rescale = 1 / k; // otherwise values get large
-
-                *io *= k * rescale;
-        }
-}
-
-void ResampleUtility::gen_sinc(double* out, int size, double cutoff, double kaiser)
-{
-        assert( size % 2 == 0 ); // size must be even
- 
-        int const half_size = size / 2;
-        double* const mid = &out [half_size];
- 
-        // Generate right half of sinc
-        for ( int i = 0; i < half_size; i++ )
-        {
-                double angle = (i * 2 + 1) * (Math::Pi / 2);
-                mid [i] = sin( angle * cutoff ) / angle;
-        }
- 
-        kaiser_window( mid, half_size, kaiser );
- 
-        // Mirror for left half
-        for ( int i = 0; i < half_size; i++ )
-                out [i] = mid [half_size - 1 - i];
-}
-
-void ResampleUtility::gen_sinc_os(double* out, int size, double cutoff, double kaiser)
-{
-        assert( size % 2 == 1); // size must be odd
- 
-	for(int i = 0; i < size; i++)
-	{
-         if(i == (size / 2))
-          out[i] = 2 * Math::Pi * (cutoff / 2); //0.078478; //1.0; //sin(2 * M_PI * (cutoff / 2) * (i - size / 2)) / (i - (size / 2));
-	 else
- 	  out[i] = sin(2 * Math::Pi * (cutoff / 2) * (i - size / 2)) / (i - (size / 2));
-
-//	 out[i] *= 0.3635819 - 0.4891775 * cos(2 * M_PI * i / (size - 1)) + 0.1365995 * cos(4 * M_PI * i / (size - 1)) - 0.0106411 * cos(6 * M_PI * i / (size - 1));
-//0.42 - 0.5 * cos(2 * M_PI * i / (size - 1)) + 0.08 * cos(4 * M_PI * i / (size - 1));
-
-//         printf("%d %f\n", i, out[i]);
-	}
-
-	kaiser_window(&out[size / 2], size / 2 + 1, kaiser);
-
-        // Mirror for left half
-        for ( int i = 0; i < size / 2; i++ )
-                out [i] = out [size - 1 - i];
-
-}
-
-void ResampleUtility::normalize(double* io, int size, double gain)
-{
-        double sum = 0;
-        for ( int i = 0; i < size; i++ )
-                sum += io [i];
-
-        double scale = gain / sum;
-        for ( int i = 0; i < size; i++ )
-                io [i] *= scale;
-}
-
-void* ResampleUtility::make_aligned(void* ptr, unsigned boundary)
-{
- unsigned char* null_ptr = (unsigned char *)nullptr;
- unsigned char* uc_ptr = (unsigned char *)ptr;
-
- uc_ptr += (boundary - ((uc_ptr - null_ptr) & (boundary - 1))) & (boundary - 1);
-
- //while((uc_ptr - null_ptr) & (boundary - 1))
- // uc_ptr++;
-
- //printf("%16llx %16llx\n", (unsigned long long)ptr, (unsigned long long)uc_ptr);
-
- assert((uc_ptr - (unsigned char *)ptr) < boundary && (uc_ptr >= (unsigned char *)ptr));
-
- return uc_ptr;
-}
--- a/higan/audio/resample/linear.hpp
+++ b/higan/audio/resample/linear.hpp
@ -1,43 +0,0 @@
-#pragma once
-
-struct ResampleLinear : Resampler {
-  ResampleLinear(DSP& dsp) : Resampler(dsp) {}
-
-  inline auto setFrequency() -> void;
-  inline auto clear() -> void;
-  inline auto sample() -> void;
-
-private:
-  double fraction;
-  double step;
-};
-
-auto ResampleLinear::setFrequency() -> void {
-  fraction = 0.0;
-  step = dsp.settings.frequency / frequency;
-}
-
-auto ResampleLinear::clear() -> void {
-  fraction = 0.0;
-}
-
-auto ResampleLinear::sample() -> void {
-  while(fraction <= 1.0) {
-    double channel[dsp.settings.channels];
-
-    for(auto n : range(dsp.settings.channels)) {
-      double a = dsp.buffer.read(n, -1);
-      double b = dsp.buffer.read(n, -0);
-
-      double mu = fraction;
-
-      channel[n] = a * (1.0 - mu) + b * mu;
-    }
-
-    dsp.write(channel);
-    fraction += step;
-  }
-
-  dsp.buffer.rdoffset++;
-  fraction -= 1.0;
-}
--- a/higan/audio/resample/nearest.hpp
+++ b/higan/audio/resample/nearest.hpp
@ -1,43 +0,0 @@
-#pragma once
-
-struct ResampleNearest : Resampler {
-  ResampleNearest(DSP& dsp) : Resampler(dsp) {}
-
-  inline auto setFrequency() -> void;
-  inline auto clear() -> void;
-  inline auto sample() -> void;
-
-private:
-  double fraction;
-  double step;
-};
-
-auto ResampleNearest::setFrequency() -> void {
-  fraction = 0.0;
-  step = dsp.settings.frequency / frequency;
-}
-
-auto ResampleNearest::clear() -> void {
-  fraction = 0.0;
-}
-
-auto ResampleNearest::sample() -> void {
-  while(fraction <= 1.0) {
-    double channel[dsp.settings.channels];
-
-    for(auto n : range(dsp.settings.channels)) {
-      double a = dsp.buffer.read(n, -1);
-      double b = dsp.buffer.read(n, -0);
-
-      double mu = fraction;
-
-      channel[n] = mu < 0.5 ? a : b;
-    }
-
-    dsp.write(channel);
-    fraction += step;
-  }
-
-  dsp.buffer.rdoffset++;
-  fraction -= 1.0;
-}
--- a/higan/audio/resample/sinc.hpp
+++ b/higan/audio/resample/sinc.hpp
@ -1,62 +0,0 @@
-#pragma once
-
-#include "lib/sinc.hpp"
-
-struct ResampleSinc : Resampler {
-  inline ResampleSinc(DSP& dsp);
-  inline ~ResampleSinc();
-
-  inline auto setFrequency() -> void;
-  inline auto clear() -> void;
-  inline auto sample() -> void;
-
-private:
-  inline void remakeSinc();
-  SincResample* sincResampler[8] = {0};
-};
-
-ResampleSinc::ResampleSinc(DSP& dsp) : Resampler(dsp) {
-  for(auto n : range(8)) {
-    sincResampler[n] = nullptr;
-  }
-}
-
-ResampleSinc::~ResampleSinc() {
-  for(auto n : range(8)) {
-    if(sincResampler[n]) delete sincResampler[n];
-  }
-}
-
-auto ResampleSinc::setFrequency() -> void {
-  remakeSinc();
-}
-
-auto ResampleSinc::clear() -> void {
-  remakeSinc();
-}
-
-auto ResampleSinc::sample() -> void {
-  for(auto c : range(dsp.settings.channels)) {
-    sincResampler[c]->write(dsp.buffer.read(c));
-  }
-
-  if(sincResampler[0]->output_avail()) {
-    do {
-      for(auto c : range(dsp.settings.channels)) {
-        dsp.output.write(c) = sincResampler[c]->read();
-      }
-      dsp.output.wroffset++;
-    } while(sincResampler[0]->output_avail());
-  }
-
-  dsp.buffer.rdoffset++;
-}
-
-auto ResampleSinc::remakeSinc() -> void {
-  assert(dsp.settings.channels < 8);
-
-  for(auto c : range(dsp.settings.channels)) {
-    if(sincResampler[c]) delete sincResampler[c];
-    sincResampler[c] = new SincResample(dsp.settings.frequency, frequency, 0.85, SincResample::QUALITY_HIGH);
-  }
-}
--- a/higan/audio/settings.hpp
+++ b/higan/audio/settings.hpp
@ -1,46 +0,0 @@
-#pragma once
-
-auto DSP::setChannels(uint channels) -> void {
-  channels = max(1u, channels);
-  buffer.setChannels(channels);
-  output.setChannels(channels);
-  settings.channels = channels;
-}
-
-auto DSP::setPrecision(uint precision) -> void {
-  settings.precision = precision;
-  settings.intensity = 1 << (settings.precision - 1);
-  settings.intensityInverse = 1.0 / settings.intensity;
-}
-
-auto DSP::setFrequency(double frequency) -> void {
-  settings.frequency = frequency;
-  resampler->setFrequency();
-}
-
-auto DSP::setVolume(double volume) -> void {
-  settings.volume = volume;
-}
-
-auto DSP::setBalance(double balance) -> void {
-  settings.balance = balance;
-}
-
-auto DSP::setResampler(ResampleEngine engine) -> void {
-  if(resampler) delete resampler;
-
-  switch(engine) { default:
-  case ResampleEngine::Nearest: resampler = new ResampleNearest(*this); return;
-  case ResampleEngine::Linear:  resampler = new ResampleLinear (*this); return;
-  case ResampleEngine::Cosine:  resampler = new ResampleCosine (*this); return;
-  case ResampleEngine::Cubic:   resampler = new ResampleCubic  (*this); return;
-  case ResampleEngine::Hermite: resampler = new ResampleHermite(*this); return;
-  case ResampleEngine::Average: resampler = new ResampleAverage(*this); return;
-  case ResampleEngine::Sinc:    resampler = new ResampleSinc   (*this); return;
-  }
-}
-
-auto DSP::setResamplerFrequency(double frequency) -> void {
-  resampler->frequency = frequency;
-  resampler->setFrequency();
-}
--- a/higan/audio/stream.cpp
+++ b/higan/audio/stream.cpp
@ -0,0 +1,146 @@
+//Emulator::Stream implements advanced audio resampling
+//First, a lowpass sinc filter is used (with a Blackman window to reduce rippling) in order to remove aliasing
+//Second, a decimator is used to reduce the CPU overhead of the sinc function
+//Finally, a hermite resampler is used to resample to the exact requested output frequency
+//Note: when the cutoff frequency is >= 0.5; only the hermite resampler is used
+
+Stream::Stream(uint channels, double inputFrequency) : channels(channels), inputFrequency(inputFrequency) {
+}
+
+Stream::~Stream() {
+  reset();
+}
+
+auto Stream::reset() -> void {
+  if(tap) delete[] tap, tap = nullptr;
+  if(input) for(auto c : range(channels)) delete[] input[c];
+  delete[] input, input = nullptr;
+  if(queue) for(auto c : range(channels)) delete[] queue[c];
+  delete[] queue, queue = nullptr;
+  if(output) for(auto c : range(channels)) delete[] output[c];
+  delete[] output, output = nullptr;
+}
+
+auto Stream::setFrequency(double outputFrequency_) -> void {
+  reset();
+
+  const double pi = 3.141592;
+  auto sinc = [&](double x) -> double {
+    if(x == 0) return 1;
+    return sin(pi * x) / (pi * x);
+  };
+
+  outputFrequency = outputFrequency_;
+  cutoffFrequency = outputFrequency / inputFrequency;
+  if(cutoffFrequency < 0.5) {
+    double transitionBandwidth = 0.008;  //lower = higher quality; more taps (slower)
+    taps = (uint)ceil(4.0 / transitionBandwidth) | 1;
+    tap = new double[taps];
+
+    double sum = 0.0;
+    for(uint t : range(taps)) {
+      //sinc filter
+      double s = sinc(2.0 * cutoffFrequency * (t - (taps - 1) / 2.0));
+
+      //blackman window
+      double b = 0.42 - 0.5 * cos(2.0 * pi * t / (taps - 1)) + 0.08 * cos(4.0 * pi * t / (taps - 1));
+
+      tap[t] = s * b;
+      sum += tap[t];
+    }
+
+    //normalize so that the sum of all coefficients is 1.0
+    for(auto t : range(taps)) tap[t] /= sum;
+  } else {
+    taps = 1;
+    tap = new double[taps];
+    tap[0] = 1.0;
+  }
+
+  decimationRate = max(1, (uint)floor(inputFrequency / outputFrequency));
+  decimationOffset = 0;
+
+  input = new double*[channels];
+  for(auto c : range(channels)) input[c] = new double[taps * 2]();
+  inputOffset = 0;
+
+  resamplerFrequency = inputFrequency / decimationRate;
+  resamplerFraction = 0.0;
+  resamplerStep = resamplerFrequency / outputFrequency;
+  queue = new double*[channels];
+  for(auto c : range(channels)) queue[c] = new double[4]();
+
+  output = new double*[channels];
+  outputs = inputFrequency * 0.02;
+  for(auto c : range(channels)) output[c] = new double[outputs]();
+  outputReadOffset = 0;
+  outputWriteOffset = 0;
+}
+
+auto Stream::pending() const -> bool {
+  return outputReadOffset != outputWriteOffset;
+}
+
+auto Stream::read(double* samples) -> void {
+  for(auto c : range(channels)) {
+    samples[c] = output[c][outputReadOffset];
+  }
+  if(channels == 1) samples[1] = samples[0];  //monaural->stereo hack
+  if(++outputReadOffset >= outputs) outputReadOffset = 0;
+}
+
+auto Stream::write(int16* samples) -> void {
+  inputOffset = !inputOffset ? taps - 1 : inputOffset - 1;
+  for(auto c : range(channels)) {
+    auto sample = (samples[c] + 32768.0) / 65535.0;  //normalize
+    input[c][inputOffset] = input[c][inputOffset + taps] = sample;
+  }
+
+  if(++decimationOffset >= decimationRate) {
+    decimationOffset = 0;
+
+    for(auto c : range(channels)) {
+      double sample = 0.0;
+      for(auto t : range(taps)) sample += input[c][inputOffset + t] * tap[t];
+
+      auto& q = queue[c];
+      q[0] = q[1];
+      q[1] = q[2];
+      q[2] = q[3];
+      q[3] = sample;
+    }
+
+    //4-tap hermite
+    auto& mu = resamplerFraction;
+    while(mu <= 1.0) {
+      for(auto c : range(channels)) {
+        auto& q = queue[c];
+
+        const double tension = 0.0;  //-1 = low, 0 = normal, +1 = high
+        const double bias    = 0.0;  //-1 = left, 0 = even, +1 = right
+
+        double mu1 = mu;
+        double mu2 = mu * mu;
+        double mu3 = mu * mu * mu;
+
+        double m0 = (q[1] - q[0]) * (1.0 + bias) * (1.0 - tension) / 2.0
+                  + (q[2] - q[1]) * (1.0 - bias) * (1.0 - tension) / 2.0;
+        double m1 = (q[2] - q[1]) * (1.0 + bias) * (1.0 - tension) / 2.0
+                  + (q[3] - q[2]) * (1.0 - bias) * (1.0 - tension) / 2.0;
+
+        double a0 = +2 * mu3 - 3 * mu2 + 1;
+        double a1 =      mu3 - 2 * mu2 + mu1;
+        double a2 =      mu3 -     mu2;
+        double a3 = -2 * mu3 + 3 * mu2;
+
+        output[c][outputWriteOffset] = (a0 * q[1]) + (a1 * m0) + (a2 * m1) + (a3 * q[2]);
+      }
+
+      if(++outputWriteOffset >= outputs) outputWriteOffset = 0;
+      mu += resamplerStep;
+      audio.poll();
+    }
+
+    mu -= 1.0;
+  }
+}
--- a/higan/emulator/emulator.hpp
+++ b/higan/emulator/emulator.hpp
@ -8,7 +8,7 @@ using namespace nall;

 namespace Emulator {
  static const string Name = "higan";
-  static const string Version = "098.06";
+  static const string Version = "098.07";
  static const string Author = "byuu";
  static const string License = "GPLv3";
  static const string Website = "http://byuu.org/";
--- a/higan/fc/apu/apu.cpp
+++ b/higan/fc/apu/apu.cpp
@ -57,7 +57,7 @@ auto APU::main() -> void {
 //output  = filter.run_lopass(output);
  output  = sclamp<16>(output);

-  stream->sample(output, output);
+  stream->sample(output);

  tick();
 }
@ -89,7 +89,7 @@ auto APU::power() -> void {

 auto APU::reset() -> void {
  create(APU::Enter, 21'477'272);
-  stream = Emulator::audio.createStream(21'477'272.0 / 12.0);
+  stream = Emulator::audio.createStream(1, 21'477'272.0 / 12.0);

  pulse[0].reset();
  pulse[1].reset();
--- a/higan/gb/apu/apu.cpp
+++ b/higan/gb/apu/apu.cpp
@ -62,7 +62,7 @@ auto APU::hipass(int16& sample, int64& bias) -> void {

 auto APU::power() -> void {
  create(Enter, 2 * 1024 * 1024);
-  if(!system.sgb()) stream = Emulator::audio.createStream(2 * 1024 * 1024);
+  if(!system.sgb()) stream = Emulator::audio.createStream(2, 2 * 1024 * 1024);
  for(uint n = 0xff10; n <= 0xff3f; n++) bus.mmio[n] = this;

  square1.power();
--- a/higan/gb/cartridge/mbc1m/mbc1m.cpp
+++ b/higan/gb/cartridge/mbc1m/mbc1m.cpp
@ -1,10 +1,15 @@
 auto Cartridge::MBC1M::mmio_read(uint16 addr) -> uint8 {
  if((addr & 0xc000) == 0x0000) {  //$0000-3fff
-    return cartridge.rom_read((romHi << 4) * 0x4000 + addr.bits(0,13));
+    if(!modeSelect) return cartridge.rom_read(addr & 0x3fff);
+    return cartridge.rom_read((romHi << 4) * 0x4000 + (addr & 0x3fff));
  }

  if((addr & 0xc000) == 0x4000) {  //$4000-7fff
-    return cartridge.rom_read((romHi << 4 | romLo) * 0x4000 + addr.bits(0,13));
+    return cartridge.rom_read((romHi << 4 | romLo) * 0x4000 + (addr & 0x3fff));
+  }
+
+  if((addr & 0xe000) == 0xa000) {  //$a000-bfff
+    return cartridge.ram_read(addr & 0x1fff);
  }

  return 0xff;
@ -18,9 +23,18 @@ auto Cartridge::MBC1M::mmio_write(uint16 addr, uint8 data) -> void {
  if((addr & 0xe000) == 0x4000) {  //$4000-5fff
    romHi = data.bits(0,1);
  }
+
+  if((addr & 0xe000) == 0x6000) {  //$6000-7fff
+    modeSelect = data.bit(0);
+  }
+
+  if((addr & 0xe000) == 0xa000) {  //$a000-bfff
+    cartridge.ram_write(addr & 0x1fff, data);
+  }
 }

 auto Cartridge::MBC1M::power() -> void {
  romHi = 0;
  romLo = 1;
+  modeSelect = 0;
 }
--- a/higan/gb/cartridge/mbc1m/mbc1m.hpp
+++ b/higan/gb/cartridge/mbc1m/mbc1m.hpp
@ -5,4 +5,5 @@ struct MBC1M : MMIO {

  uint4 romLo;
  uint2 romHi;
+  uint1 modeSelect;
 } mbc1m;
--- a/higan/gb/cartridge/serialization.cpp
+++ b/higan/gb/cartridge/serialization.cpp
@ -9,6 +9,7 @@ auto Cartridge::serialize(serializer& s) -> void {

  s.integer(mbc1m.romLo);
  s.integer(mbc1m.romHi);
+  s.integer(mbc1m.modeSelect);

  s.integer(mbc2.ram_enable);
  s.integer(mbc2.rom_select);
--- a/higan/gba/apu/apu.cpp
+++ b/higan/gba/apu/apu.cpp
@ -74,7 +74,7 @@ auto APU::step(uint clocks) -> void {

 auto APU::power() -> void {
  create(APU::Enter, 16'777'216);
-  stream = Emulator::audio.createStream(16'777'216.0 / 512.0);
+  stream = Emulator::audio.createStream(2, 16'777'216.0 / 512.0);

  square1.power();
  square2.power();
--- a/higan/sfc/coprocessor/icd2/icd2.cpp
+++ b/higan/sfc/coprocessor/icd2/icd2.cpp
@ -54,7 +54,7 @@ auto ICD2::power() -> void {

 auto ICD2::reset(bool soft) -> void {
  create(ICD2::Enter, cpu.frequency / 5);
-  if(!soft) stream = Emulator::audio.createStream(4194304.0 / 2.0);
+  if(!soft) stream = Emulator::audio.createStream(2, 4194304.0 / 2.0);

  r6003 = 0x00;
  r6004 = 0xff;
--- a/higan/sfc/coprocessor/msu1/msu1.cpp
+++ b/higan/sfc/coprocessor/msu1/msu1.cpp
@ -59,7 +59,7 @@ auto MSU1::power() -> void {

 auto MSU1::reset() -> void {
  create(MSU1::Enter, 44100);
-  stream = Emulator::audio.createStream(44100.0);
+  stream = Emulator::audio.createStream(2, 44100.0);

  mmio.dataSeekOffset = 0;
  mmio.dataReadOffset = 0;
--- a/higan/sfc/dsp/dsp.cpp
+++ b/higan/sfc/dsp/dsp.cpp
@ -241,7 +241,7 @@ auto DSP::power() -> void {

 auto DSP::reset() -> void {
  create(Enter, system.apuFrequency());
-  stream = Emulator::audio.createStream(system.apuFrequency() / 768.0);
+  stream = Emulator::audio.createStream(2, system.apuFrequency() / 768.0);

  REG(FLG) = 0xe0;
  state.noise = 0x4000;
--- a/higan/ws/apu/apu.cpp
+++ b/higan/ws/apu/apu.cpp
@ -66,7 +66,7 @@ auto APU::step(uint clocks) -> void {

 auto APU::power() -> void {
  create(APU::Enter, 3'072'000);
-  stream = Emulator::audio.createStream(3'072'000.0);
+  stream = Emulator::audio.createStream(2, 3'072'000.0);

  bus.map(this, 0x004a, 0x004c);
  bus.map(this, 0x004e, 0x0050);
--- a/ruby/audio/xaudio2.cpp
+++ b/ruby/audio/xaudio2.cpp
@ -1,5 +1,6 @@
 #include "xaudio2.hpp"
 #include <windows.h>
+#include <audioclient.h>

 struct AudioXAudio2 : Audio, public IXAudio2VoiceCallback {
  ~AudioXAudio2() { term(); }