restructure main emulator loop to be driven by the amount of audio available in the aica ring buffer (#1)

made audio backend discard an incoming amount of aica frames proportionate to any silence that was previously written out to keep audio time domain in sync with emulator
2017-01-04 00:45:47 -08:00 · 2017-01-04 00:45:47 -08:00 · 93cde7f1d0
parent 3f8d06cdf2
commit 93cde7f1d0
5 changed files with 106 additions and 73 deletions
--- a/src/audio/audio_backend.h
+++ b/src/audio/audio_backend.h
@ -7,6 +7,7 @@ struct audio_backend;
 struct audio_backend *audio_create(struct aica *aica);
 void audio_destroy(struct audio_backend *audio);

+int audio_buffer_low(struct audio_backend *audio);
 void audio_pump_events(struct audio_backend *audio);

 #endif
--- a/src/audio/soundio_backend.c
+++ b/src/audio/soundio_backend.c
@ -9,6 +9,7 @@ struct audio_backend {
  struct SoundIo *soundio;
  struct SoundIoDevice *device;
  struct SoundIoOutStream *outstream;
+  int frames_silenced;
 };

 static void audio_write_callback(struct SoundIoOutStream *outstream,
@ -18,11 +19,21 @@ static void audio_write_callback(struct SoundIoOutStream *outstream,
  struct SoundIoChannelArea *areas;
  int err;

+  /* if any frames were silenced previously in order to prevent an underflow,
+     discard the same number of incoming aica frames to keep the audio time
+     domain in sync with the emulator */
+  while (audio->frames_silenced) {
+    int skipped = aica_skip_frames(audio->aica, audio->frames_silenced);
+    if (!skipped) {
+      break;
+    }
+    audio->frames_silenced -= skipped;
+  }
+
  uint32_t frames[10];
  int16_t *samples = (int16_t *)frames;
+  int frames_remaining = frame_count_max;
  int frames_available = aica_available_frames(audio->aica);
-  int frames_remaining = MIN(frames_available, frame_count_max);
-  int frames_silence = frame_count_max - frames_remaining;

  while (frames_remaining > 0) {
    int frame_count = frames_remaining;
@ -38,22 +49,29 @@ static void audio_write_callback(struct SoundIoOutStream *outstream,
    }

    for (int frame = 0; frame < frame_count;) {
-      /* batch read frames from aica */
      int n = MIN(frame_count - frame, array_size(frames));
-      int read = aica_read_frames(audio->aica, frames, n);
-      CHECK_EQ(read, n);
+
+      if (frames_available > 0) {
+        /* batch read frames from aica */
+        n = aica_read_frames(audio->aica, frames, n);
+        frames_available -= n;
+      } else {
+        /* write out silence */
+        memset(frames, 0, sizeof(frames));
+        audio->frames_silenced += n;
+      }

      /* copy frames to output stream */
      for (int channel = 0; channel < layout->channel_count; channel++) {
        struct SoundIoChannelArea *area = &areas[channel];

-        for (int i = 0; i < read; i++) {
+        for (int i = 0; i < n; i++) {
          int16_t *ptr = (int16_t *)(area->ptr + area->step * (frame + i));
          *ptr = samples[channel + 2 * i];
        }
      }

-      frame += read;
+      frame += n;
    }

    if ((err = soundio_outstream_end_write(outstream))) {
@ -63,35 +81,6 @@ static void audio_write_callback(struct SoundIoOutStream *outstream,

    frames_remaining -= frame_count;
  }
-
-  while (frames_silence > 0) {
-    int frame_count = frames_silence;
-
-    if ((err = soundio_outstream_begin_write(outstream, &areas, &frame_count))) {
-      LOG_WARNING("Error writing to output stream: %s", soundio_strerror(err));
-      return;
-    }
-
-    if (!frame_count) {
-      break;
-    }
-
-    for (int channel = 0; channel < layout->channel_count; channel++) {
-      struct SoundIoChannelArea *area = &areas[channel];
-
-      for (int i = 0; i < frame_count; i++) {
-        int16_t *ptr = (int16_t *)(area->ptr + area->step * i);
-        *ptr = 0;
-      }
-    }
-
-    if ((err = soundio_outstream_end_write(outstream))) {
-      LOG_WARNING("Error writing to output stream: %s", soundio_strerror(err));
-      return;
-    }
-
-    frames_silence -= frame_count;
-  }
 }

 void audio_underflow_callback(struct SoundIoOutStream *outstream) {
@ -102,6 +91,11 @@ void audio_pump_events(struct audio_backend *audio) {
  soundio_flush_events(audio->soundio);
 }

+int audio_buffer_low(struct audio_backend *audio) {
+  int low_water_mark = (int)(44100.0f * (OPTION_latency / 1000.0f));
+  return aica_available_frames(audio->aica) <= low_water_mark;
+}
+
 void audio_destroy(struct audio_backend *audio) {
  if (audio->outstream) {
    soundio_outstream_destroy(audio->outstream);
--- a/src/emu/emulator.c
+++ b/src/emu/emulator.c
@ -17,8 +17,6 @@
 #include "ui/nuklear.h"
 #include "ui/window.h"

-DEFINE_OPTION_INT(throttle, 1,
-                  "Throttle emulation speed to match the original hardware");
 DEFINE_AGGREGATE_COUNTER(frames);

 struct emu {
@ -110,15 +108,6 @@ static void emu_debug_menu(void *data, struct nk_context *ctx) {
           frames, ta_renders, pvr_vblanks, sh4_instrs, arm7_instrs);
  win_set_status(emu->window, status);

-  /* add drop down menus */
-  nk_layout_row_push(ctx, 70.0f);
-  if (nk_menu_begin_label(ctx, "EMULATOR", NK_TEXT_LEFT,
-                          nk_vec2(140.0f, 200.0f))) {
-    nk_layout_row_dynamic(ctx, DEBUG_MENU_HEIGHT, 1);
-    nk_checkbox_label(ctx, "throttled", &OPTION_throttle);
-    nk_menu_end(ctx);
-  }
-
  dc_debug_menu(emu->dc, ctx);
 }

@ -154,9 +143,8 @@ static void emu_close(void *data) {
  emu->running = 0;
 }

-static void *emu_audio_thread(void *data) {
+static void *emu_core_thread(void *data) {
  struct emu *emu = data;
-
  struct audio_backend *audio = audio_create(emu->dc->aica);

  if (!audio) {
@ -164,44 +152,85 @@ static void *emu_audio_thread(void *data) {
    return 0;
  }

-  while (emu->running) {
-    audio_pump_events(audio);
+  /* main emulation loop

-    /* audio_pump_events just checks for device changes, there's no need to
-       spin */
-    sleep(1);
-  }
+     unlike the real machine which runs multiple hardware devices in parallel,
+     all of the emulated hardware in redream is ran synchronously, in a
+     cooperative multitasking fashion. this removes numerous complexities in
+     the c code, as well as the runtime generated code.

-  audio_destroy(audio);
+     on creation, each hardware device registers itself with the scheduler
+     interface. this scheduler interface is used by dc_tick to run each device
+     for the specified slice of guest time. baring in mind that each device is
+     ran synchronously, this slice should be low enough that devices waiting on
+     interrupts from eachother are serviced regularly, but high enough that
+     there's not too much context switching. please note, it's extremely
+     important that this slice is constant to keep emulation deterministic
+     between runs.

-  return 0;
-}
+     the next issue tackled by this loop is, when should dc_tick be called to
+     execute this constant slice of time. the answer really depends on what
+     the goal of emulation is.

-static void *emu_core_thread(void *data) {
-  struct emu *emu = data;
+     when the goal is to run completely unthrottled, it should be called as much
+     as possible, e.g.:
+
+       while (1) {
+         dc_tick(slice);
+       }
+
+     when the goal is to run at the same speed as the original dreamcast, the
+     answer is a bit more involved. at first it may seem desirable to use the
+     host machine's clock to schedule each slice, e.g.:
+
+       while (1) {
+         current_time = time();
+         delta_time = next_time - current_time;
+
+         if (delta_time < 0) {
+           dc_tick(slice);
+           next_time = current_time + delta_time + slice;
+         }
+       }
+
+     this will, in general, run the emulator at the same rate as the original
+     dreamcast. when performance hiccups, the host's time domain will move
+     forward, while the emulator's time domain will fall behind. the emulator
+     will then speed up temporarily due to the delta_time offset, eventually
+     synchronizing it's view of time with the host as delta_time approaches 0.
+
+     the downsides to this approach are audio, and video to some degree, are
+     not presented well when performance hiccups. imagine the scenario that
+     performance grinds to a complete halt for 5 seconds. in this case, host
+     time is 5 seconds ahead of guest time, the loop will run 5 seconds worth
+     of emulator time in say, 1 second of host time, again synchronizing the
+     time domains. the problem being that, now 5 seconds of audio and video
+     have been generated for something the user has experienced for only 1
+     second. skipping video frames in this case isn't the worst experience
+     but crackling and distorted audio can be awful. */

  static const int64_t MACHINE_STEP = HZ_TO_NANO(1000);
-  int64_t current_time = time_nanoseconds();
-  int64_t next_time = current_time;
-  int64_t delta_time = 0;
+  int64_t current_time = 0;
+  int64_t next_pump_time = 0;

  while (emu->running) {
    current_time = time_nanoseconds();

-    if (OPTION_throttle) {
-      delta_time = current_time - next_time;
-    } else {
-      delta_time = 0;
+    while (audio_buffer_low(audio)) {
+      dc_tick(emu->dc, MACHINE_STEP);
    }

-    if (delta_time >= 0) {
-      dc_tick(emu->dc, MACHINE_STEP);
-      next_time = current_time + MACHINE_STEP - delta_time;
+    /* audio events are just for device connections, check infrequently */
+    if (current_time > next_pump_time) {
+      audio_pump_events(audio);
+      next_pump_time = current_time + NS_PER_SEC;
    }

    prof_update(current_time);
  }

+  audio_destroy(audio);
+
  return 0;
 }

@ -244,7 +273,6 @@ void emu_run(struct emu *emu, const char *path) {
     produces complete frames of decoded data, and the audio and video
     thread are responsible for simply presenting the data */
  thread_t core_thread = thread_create(&emu_core_thread, NULL, emu);
-  thread_t audio_thread = thread_create(&emu_audio_thread, NULL, emu);

  while (emu->running) {
    win_pump_events(emu->window);
@ -252,7 +280,6 @@ void emu_run(struct emu *emu, const char *path) {

  /* wait for the core thread to exit */
  void *result;
-  thread_join(audio_thread, &result);
  thread_join(core_thread, &result);
 }

--- a/src/hw/aica/aica.c
+++ b/src/hw/aica/aica.c
@ -391,6 +391,16 @@ static void aica_write_frames(struct aica *aica, const void *frames,
  }
 }

+int aica_skip_frames(struct aica *aica, int num_frames) {
+  int available = ringbuf_available(aica->frames);
+  int size = MIN(available, num_frames * 4);
+  CHECK_EQ(size % 4, 0);
+
+  ringbuf_advance_read_ptr(aica->frames, size);
+
+  return size / 4;
+}
+
 int aica_read_frames(struct aica *aica, void *frames, int num_frames) {
  int available = ringbuf_available(aica->frames);
  int size = MIN(available, num_frames * 4);
--- a/src/hw/aica/aica.h
+++ b/src/hw/aica/aica.h
@ -14,6 +14,7 @@ struct aica *aica_create(struct dreamcast *dc);
 void aica_destroy(struct aica *aica);

 int aica_available_frames(struct aica *aica);
-int aica_read_frames(struct aica *aica, void *buffer, int size);
+int aica_skip_frames(struct aica *aica, int num_frames);
+int aica_read_frames(struct aica *aica, void *buffer, int num_frames);

 #endif