Merge pull request #217 from DrChat/audio_decoding

Audio decoding
2015-05-22 23:08:48 -07:00 · 2015-05-22 23:08:48 -07:00 · 10afeaa27c
parent bd8db4810a 4f8d8e62b3
commit 10afeaa27c
7 changed files with 602 additions and 73 deletions
--- a/.gitmodules
+++ b/.gitmodules
@ -22,3 +22,6 @@
 [submodule "third_party/binutils-ppc-cygwin"]
 	path = third_party/binutils-ppc-cygwin
 	url = https://github.com/benvanik/binutils-ppc-cygwin
 [submodule "third_party/libav-bin"]
 	path = third_party/libav-bin
 	url = https://github.com/DrChat/xenia-libav-bin.git
--- a/libxenia.vcxproj
+++ b/libxenia.vcxproj
@ -16,6 +16,7 @@
  </ItemGroup>
  <ItemGroup>
    <ClCompile Include="src\xenia\apu\apu.cc" />
    <ClCompile Include="src\xenia\apu\audio_decoder.cc" />
    <ClCompile Include="src\xenia\apu\audio_driver.cc" />
    <ClCompile Include="src\xenia\apu\audio_system.cc" />
    <ClCompile Include="src\xenia\apu\nop\nop_apu.cc" />
@ -203,6 +204,7 @@
  <ItemGroup>
    <ClInclude Include="src\xenia\apu\apu-private.h" />
    <ClInclude Include="src\xenia\apu\apu.h" />
    <ClInclude Include="src\xenia\apu\audio_decoder.h" />
    <ClInclude Include="src\xenia\apu\audio_driver.h" />
    <ClInclude Include="src\xenia\apu\audio_system.h" />
    <ClInclude Include="src\xenia\apu\nop\nop_apu-private.h" />
@ -483,14 +485,15 @@
      <WarningLevel>Level3</WarningLevel>
      <Optimization>Disabled</Optimization>
      <PreprocessorDefinitions>BEA_ENGINE_STATIC=1;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
-      <AdditionalIncludeDirectories>$(SolutionDir)\third_party\beaengine\include\;$(SolutionDir)\third_party\llvm\include\;$(SolutionDir)\third_party\gflags\src\;$(SolutionDir)\src\;$(SolutionDir)\third_party;$(SolutionDir)\</AdditionalIncludeDirectories>
+      <AdditionalIncludeDirectories>$(SolutionDir)\third_party\libav-bin\include\;$(SolutionDir)\third_party\beaengine\include\;$(SolutionDir)\third_party\llvm\include\;$(SolutionDir)\third_party\gflags\src\;$(SolutionDir)\src\;$(SolutionDir)\third_party;$(SolutionDir)\</AdditionalIncludeDirectories>
    </ClCompile>
    <Link>
      <SubSystem>Windows</SubSystem>
      <GenerateDebugInformation>true</GenerateDebugInformation>
    </Link>
    <Lib>
-      <AdditionalDependencies>libgflags.lib;libglew.lib</AdditionalDependencies>
+      <AdditionalDependencies>libavcodec.a;libavutil.a;libgflags.lib;libglew.lib</AdditionalDependencies>
      <AdditionalLibraryDirectories>$(SolutionDir)third_party\libav-bin\lib\$(Configuration);$(SolutionDir)build\bin\$(Configuration)\</AdditionalLibraryDirectories>
    </Lib>
  </ItemDefinitionGroup>
  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Checked|x64'">
@ -500,14 +503,15 @@
      <WarningLevel>Level3</WarningLevel>
      <Optimization>Disabled</Optimization>
      <PreprocessorDefinitions>BEA_ENGINE_STATIC=1;WIN32;_DEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
-      <AdditionalIncludeDirectories>$(SolutionDir)\third_party\beaengine\include\;$(SolutionDir)\third_party\llvm\include\;$(SolutionDir)\third_party\gflags\src\;$(SolutionDir)\src\;$(SolutionDir)\third_party;$(SolutionDir)\</AdditionalIncludeDirectories>
+      <AdditionalIncludeDirectories>$(SolutionDir)\third_party\libav-bin\include\;$(SolutionDir)\third_party\beaengine\include\;$(SolutionDir)\third_party\llvm\include\;$(SolutionDir)\third_party\gflags\src\;$(SolutionDir)\src\;$(SolutionDir)\third_party;$(SolutionDir)\</AdditionalIncludeDirectories>
    </ClCompile>
    <Link>
      <SubSystem>Windows</SubSystem>
      <GenerateDebugInformation>true</GenerateDebugInformation>
    </Link>
    <Lib>
-      <AdditionalDependencies>libgflags.lib;libglew.lib</AdditionalDependencies>
+      <AdditionalDependencies>libavcodec.a;libavutil.a;libgflags.lib;libglew.lib</AdditionalDependencies>
      <AdditionalLibraryDirectories>$(SolutionDir)third_party\libav-bin\lib\$(Configuration);$(SolutionDir)build\bin\$(Configuration)\</AdditionalLibraryDirectories>
    </Lib>
  </ItemDefinitionGroup>
  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
@ -519,7 +523,7 @@
      <FunctionLevelLinking>true</FunctionLevelLinking>
      <IntrinsicFunctions>true</IntrinsicFunctions>
      <PreprocessorDefinitions>BEA_ENGINE_STATIC=1;WIN32;NDEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
-      <AdditionalIncludeDirectories>$(SolutionDir)\third_party\beaengine\include\;$(SolutionDir)\third_party\llvm\include\;$(SolutionDir)\third_party\gflags\src\;$(SolutionDir)\src\;$(SolutionDir)\third_party;$(SolutionDir)\</AdditionalIncludeDirectories>
+      <AdditionalIncludeDirectories>$(SolutionDir)\third_party\libav-bin\include\;$(SolutionDir)\third_party\beaengine\include\;$(SolutionDir)\third_party\llvm\include\;$(SolutionDir)\third_party\gflags\src\;$(SolutionDir)\src\;$(SolutionDir)\third_party;$(SolutionDir)\</AdditionalIncludeDirectories>
    </ClCompile>
    <Link>
      <SubSystem>Windows</SubSystem>
@ -528,7 +532,8 @@
      <OptimizeReferences>true</OptimizeReferences>
    </Link>
    <Lib>
-      <AdditionalDependencies>libgflags.lib;libglew.lib</AdditionalDependencies>
+      <AdditionalDependencies>libavcodec.a;libavutil.a;libgflags.lib;libglew.lib</AdditionalDependencies>
      <AdditionalLibraryDirectories>$(SolutionDir)third_party\libav-bin\lib\$(Configuration);$(SolutionDir)build\bin\$(Configuration)\</AdditionalLibraryDirectories>
    </Lib>
  </ItemDefinitionGroup>
  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
--- a/libxenia.vcxproj.filters
+++ b/libxenia.vcxproj.filters
@ -700,9 +700,15 @@
    <ClCompile Include="src\xenia\kernel\xboxkrnl_error.cc">
      <Filter>src\xenia\kernel</Filter>
    </ClCompile>
    <ClCompile Include="src\xenia\apu\audio_decoder.cc">
      <Filter>src\xenia\apu</Filter>
    </ClCompile>
    <ClCompile Include="src\xenia\cpu\backend\x64\x64_code_cache.cc">
      <Filter>src\xenia\cpu\backend\x64</Filter>
    </ClCompile>
    <ClCompile Include="src\xenia\apu\audio_decoder.cc">
      <Filter>src\xenia\apu</Filter>
    </ClCompile>
  </ItemGroup>
  <ItemGroup>
    <ClInclude Include="src\xenia\emulator.h">
@ -1341,6 +1347,9 @@
    <ClInclude Include="src\xenia\kernel\xboxkrnl_error.h">
      <Filter>src\xenia\kernel</Filter>
    </ClInclude>
    <ClInclude Include="src\xenia\apu\audio_decoder.h">
      <Filter>src\xenia\apu</Filter>
    </ClInclude>
  </ItemGroup>
  <ItemGroup>
    <None Include="src\xenia\cpu\backend\x64\x64_sequence.inl">
--- a/src/xenia/apu/audio_decoder.cc
+++ b/src/xenia/apu/audio_decoder.cc
@ -0,0 +1,239 @@
 /**
 ******************************************************************************
 * Xenia : Xbox 360 Emulator Research Project                                 *
 ******************************************************************************
 * Copyright 2015 Ben Vanik. All rights reserved.                             *
 * Released under the BSD license - see LICENSE in the root for more details. *
 ******************************************************************************
 */
 #include "xenia/apu/audio_decoder.h"
 #include "xenia/apu/audio_system.h"
 #include "xenia/base/logging.h"
 extern "C" {
  #include "libavcodec/avcodec.h"
 }
 // Credits for most of this code goes to:
 // https://github.com/koolkdev/libertyv/blob/master/libav_wrapper/xma2dec.c
 namespace xe {
 namespace apu {
 AudioDecoder::AudioDecoder() : offset_(0), codec_(nullptr), context_(nullptr),
                              decoded_frame_(nullptr), packet_(nullptr) {}
 AudioDecoder::~AudioDecoder() {
  if (context_) {
    if (context_->extradata) {
      delete context_->extradata;
    }
    if (avcodec_is_open(context_)) {
      avcodec_close(context_);
    }
    av_free(context_);
  }
  if (decoded_frame_) {
    av_frame_free(&decoded_frame_);
  }
  if (current_frame_) {
    delete current_frame_;
  }
 }
 int AudioDecoder::Initialize(int bits) {
  static bool avcodec_initialized = false;
  if (!avcodec_initialized) {
    avcodec_register_all();
    avcodec_initialized = true;
  }
  if (bits <= 0 || bits > 32 || (bits % 8) != 0) {
    assert_always();
  }
  // Output bits per sample
  bits_ = bits;
  // Allocate important stuff
  codec_ = avcodec_find_decoder(AV_CODEC_ID_WMAPRO);
  if (!codec_) {
    return 1;
  }
  context_ = avcodec_alloc_context3(codec_);
  if (!context_) {
    return 1;
  }
  decoded_frame_ = av_frame_alloc();
  if (!decoded_frame_) {
    return 1;
  }
  packet_ = new AVPacket();
  av_init_packet(packet_);
  // Initialize these to 0. They'll actually be set later.
  context_->channels = 0;
  context_->sample_rate = 0;
  context_->block_align = XMAContextData::kBytesPerBlock;
  // Extra data passed to the decoder
  context_->extradata_size = 18;
  context_->extradata = new uint8_t[18];
  // Current frame stuff whatever
  // samples per frame * 2 max channels * output bytes
  current_frame_ = new uint8_t[XMAContextData::kSamplesPerFrame * 2 * (bits / 8)];
  current_frame_pos_ = 0;
  frame_samples_size_ = 0;
  *(short *)(context_->extradata) = 0x10; // bits per sample
  *(int *)(context_->extradata + 2) = 1; // channel mask
  *(short *)(context_->extradata + 14) = 0x10D6; // decode flags
  // FYI: We're purposely not opening the context here. That is done later.
  return 0;
 }
 int AudioDecoder::PreparePacket(uint8_t* input, size_t size,
                                int sample_rate, int channels) {
  if (size != XMAContextData::kBytesPerBlock) {
    // Invalid packet size!
    assert_always();
    return 1;
  }
  if (packet_->size > 0 || current_frame_pos_ != frame_samples_size_) {
    // Haven't finished parsing another packet
    return 1;
  }
  std::memcpy(packet_data_, input, size);
  // Modify the packet header so it's WMAPro compatible
  *((int *)packet_data_) = (((offset_ & 0x7800) | 0x400) >> 7) |
                           (*((int*)packet_data_) & 0xFFFEFF08);
  offset_ += XMAContextData::kBytesPerBlock; // Sequence number
  packet_->data = packet_data_;
  packet_->size = XMAContextData::kBytesPerBlock;
  // Re-initialize the context with new sample rate and channels
  if (context_->sample_rate != sample_rate || context_->channels != channels) {
    context_->sample_rate = sample_rate;
    context_->channels = channels;
    // We have to reopen the codec so it'll realloc whatever data it needs.
    // TODO: Find a better way.
    avcodec_close(context_);
    if (avcodec_open2(context_, codec_, NULL) < 0) {
      XELOGE("Audio Decoder: Failed to reopen context.");
      return 1;
    }
  }
  return 0;
 }
 void AudioDecoder::DiscardPacket() {
  if (packet_->size > 0 || current_frame_pos_ != frame_samples_size_) {
    packet_->data = 0;
    packet_->size = 0;
    current_frame_pos_ = frame_samples_size_;
  }
 }
 int AudioDecoder::DecodePacket(uint8_t* output, size_t output_offset, size_t output_size) {
  size_t to_copy = 0;
  size_t original_offset = output_offset;
  uint32_t sample_size = bits_ / 8;
  // We're holding onto an already-decoded frame. Copy it out.
  if (current_frame_pos_ != frame_samples_size_) {
    to_copy = std::min(output_size, frame_samples_size_ - current_frame_pos_);
    memcpy(output + output_offset, current_frame_ + current_frame_pos_, to_copy);
    current_frame_pos_ += to_copy;
    output_size -= to_copy;
    output_offset += to_copy;
  }
  while (output_size > 0 && packet_->size > 0) {
    int got_frame = 0;
    // Decode the current frame
    int len = avcodec_decode_audio4(context_, decoded_frame_, &got_frame, packet_);
    if (len < 0) {
      // Error in codec (bad sample rate or something)
      return len;
    }
    // Offset by decoded length
    packet_->size -= len;
    packet_->data += len;
    packet_->dts = packet_->pts = AV_NOPTS_VALUE;
    // Successfully decoded a frame
    if (got_frame) {
      if (decoded_frame_->nb_samples > XMAContextData::kSamplesPerFrame) {
        return -2;
      } else if (context_->sample_fmt != AV_SAMPLE_FMT_FLTP) {
        return -3;
      }
      // Check the returned buffer size
      if (av_samples_get_buffer_size(NULL, context_->channels,
                                     decoded_frame_->nb_samples,
                                     context_->sample_fmt, 1)
          != context_->channels * decoded_frame_->nb_samples * sizeof(float)) {
        return -4;
      }
      float* curSample = (float *)decoded_frame_->data[0];
      // Loop through every sample, convert and drop it into the output array
      for (int i = 0; i < decoded_frame_->nb_samples; i++) {
        float fSample = curSample[i] * (1 << (bits_ - 1));
        // Weird problem: Sometimes the samples are outside [-1,1]
        if (fSample >= 0) {
          fSample += 0.5f;
          if (fSample > (1 << (bits_ - 1)) - 1) {
            fSample = (float)(1 << (bits_ - 1)) - 1;
          }
        } else {
          fSample -= 0.5f;
        }
        // Convert the sample
        int sample = (int)fSample;
        for (uint32_t j = 0; j < sample_size; j++) {
          current_frame_[i * sample_size + j] = sample & 0xFF;
          sample >>= 8;
        }
      }
      current_frame_pos_ = 0;
      // Total size of the frame's samples
      frame_samples_size_ = context_->channels * decoded_frame_->nb_samples
                          * sample_size;
      to_copy = std::min(output_size, (size_t)(frame_samples_size_));
      std::memcpy(output + output_offset, current_frame_, to_copy);
      current_frame_pos_ += to_copy;
      output_size -= to_copy;
      output_offset += to_copy;
    }
  }
  // Return number of bytes written (typically 2048)
  return (int)(output_offset - original_offset);
 }
 } // namespace xe
 } // namespace apu
--- a/src/xenia/apu/audio_decoder.h
+++ b/src/xenia/apu/audio_decoder.h
@ -0,0 +1,66 @@
 /**
 ******************************************************************************
 * Xenia : Xbox 360 Emulator Research Project                                 *
 ******************************************************************************
 * Copyright 2015 Ben Vanik. All rights reserved.                             *
 * Released under the BSD license - see LICENSE in the root for more details. *
 ******************************************************************************
 */
 #ifndef XENIA_APU_AUDIO_DECODER_H_
 #define XENIA_APU_AUDIO_DECODER_H_
 #include "xenia/apu/audio_system.h"
 // XMA audio format:
 // From research, XMA appears to be based on WMA Pro with
 // a few (very slight) modifications.
 // XMA2 is fully backwards-compatible with XMA1.
 // Helpful resources:
 // https://github.com/koolkdev/libertyv/blob/master/libav_wrapper/xma2dec.c
 // http://hcs64.com/mboard/forum.php?showthread=14818
 // https://github.com/hrydgard/minidx9/blob/master/Include/xma2defs.h
 // Forward declarations
 struct AVCodec;
 struct AVCodecContext;
 struct AVFrame;
 struct AVPacket;
 namespace xe {
 namespace apu {
 class AudioDecoder {
  public:
    AudioDecoder();
    ~AudioDecoder();
    int Initialize(int bits);
    int PreparePacket(uint8_t* input, size_t size, int sample_rate, int channels);
    void DiscardPacket();
    int DecodePacket(uint8_t* output, size_t offset, size_t size);
  private:
    AVCodec* codec_;
    AVCodecContext* context_;
    AVFrame* decoded_frame_;
    AVPacket* packet_;
    uint8_t bits_per_frame_;
    uint32_t bits_;
    size_t current_frame_pos_;
    uint8_t* current_frame_;
    uint32_t frame_samples_size_;
    int offset_;
    uint8_t packet_data_[XMAContextData::kBytesPerBlock];
 };
 } // namespace apu
 } // namespace xe
 #endif  // XENIA_APU_AUDIO_DECODER_H_
--- a/src/xenia/apu/audio_system.cc
+++ b/src/xenia/apu/audio_system.cc
@ -10,6 +10,7 @@
 #include "xenia/apu/audio_system.h"
 #include "xenia/apu/audio_driver.h"
 #include "xenia/apu/audio_decoder.h"
 #include "xenia/base/logging.h"
 #include "xenia/base/math.h"
 #include "xenia/cpu/processor.h"
@ -55,7 +56,8 @@ const uint32_t kXmaContextSize = 64;
 const uint32_t kXmaContextCount = 320;
 AudioSystem::AudioSystem(Emulator* emulator)
-    : emulator_(emulator), memory_(emulator->memory()), worker_running_(false) {
+    : emulator_(emulator), memory_(emulator->memory()), worker_running_(false),
    decoder_running_(false) {
  memset(clients_, 0, sizeof(clients_));
  for (size_t i = 0; i < maximum_client_count_; ++i) {
    unused_clients_.push(i);
@ -85,11 +87,22 @@ X_STATUS AudioSystem::Setup() {
      kXmaContextSize * kXmaContextCount, 256, kSystemHeapPhysical);
  // Add all contexts to the free list.
  for (int i = kXmaContextCount - 1; i >= 0; --i) {
-    xma_context_free_list_.push_back(registers_.xma_context_array_ptr +
+    uint32_t ptr = registers_.xma_context_array_ptr + i * kXmaContextSize;
-                                     i * kXmaContextSize);
+
    // Initialize it
    xma_context_array_[i].guest_ptr = ptr;
    xma_context_array_[i].in_use = false;
    // Create a new decoder per context
    // Needed because some data needs to be persisted across calls
    // TODO: Need to destroy this on class destruction
    xma_context_array_[i].decoder = new AudioDecoder();
    xma_context_array_[i].decoder->Initialize(16);
  }
  registers_.next_context = 1;
  // Threads
  worker_running_ = true;
  worker_thread_ = new kernel::XHostThread(emulator()->kernel_state(),
                                           128 * 1024, 0, [this]() {
@ -98,6 +111,9 @@ X_STATUS AudioSystem::Setup() {
  });
  worker_thread_->Create();
  decoder_running_ = true;
  decoder_thread_ = std::thread(std::bind(&AudioSystem::DecoderThreadMain, this));
  return X_STATUS_SUCCESS;
 }
@ -128,6 +144,7 @@ void AudioSystem::WorkerThreadMain() {
        uint32_t client_callback = clients_[index].callback;
        uint32_t client_callback_arg = clients_[index].wrapped_callback_arg;
        lock_.unlock();
        if (client_callback) {
          uint64_t args[] = {client_callback_arg};
          processor->Execute(worker_thread_->thread_state(), client_callback,
@ -154,6 +171,156 @@ void AudioSystem::WorkerThreadMain() {
  // TODO(benvanik): call module API to kill?
 }
 void AudioSystem::DecoderThreadMain() {
  xe::threading::set_name("Audio Decoder");
  xe::Profiler::ThreadEnter("Audio Decoder");
  while (decoder_running_) {
    // Wait for the fence
    // FIXME: This actually does nothing once signaled once
    decoder_fence_.Wait();
    // Check to see if we're supposed to exit
    if (!decoder_running_) {
      break;
    }
    // Okay, let's loop through XMA contexts to find ones we need to decode!
    for (uint32_t n = 0; n < kXmaContextCount; n++) {
      XMAContext& context = xma_context_array_[n];
      if (!context.lock.try_lock()) {
        // Someone else has the lock.
        continue;
      }
      // Skip unused contexts
      if (!context.in_use) {
        context.lock.unlock();
        continue;
      }
      uint8_t* ptr = memory()->TranslatePhysical(context.guest_ptr);
      auto data = XMAContextData(ptr);
      if (data.input_buffer_0_valid || data.input_buffer_1_valid) {
        // A buffer is valid. Run the decoder!
        // Reset valid flags
        data.input_buffer_0_valid = 0;
        data.input_buffer_1_valid = 0;
        data.output_buffer_valid = 0;
        // Translate pointers for future use.
        auto in0 = memory()->TranslatePhysical(data.input_buffer_0_ptr);
        auto in1 = memory()->TranslatePhysical(data.input_buffer_1_ptr);
        auto out = memory()->TranslatePhysical(data.output_buffer_ptr);
        // I haven't seen this be used yet.
        assert(!data.input_buffer_1_block_count);
        // What I see:
        // XMA outputs 2 bytes per sample
        // 512 samples per frame (128 per subframe)
        // Max output size is data.output_buffer_block_count * 256
        // This decoder is fed packets (max 4095 per buffer)
        // Packets contain "some" frames
        // 32bit header (big endian)
        // Frames are the smallest thing the SPUs can decode.
        // They usually can span packets (libav handles this)
        // Sample rates (data.sample_rate):
        // 0 - 24 kHz ?
        // 1 - 32 kHz
        // 2 - 44.1 kHz ?
        // 3 - 48 kHz ?
        // SPUs also support stereo decoding. (data.is_stereo)
        while (true) {
          // Initial check - see if we've finished with the input
          // TODO - Probably need to move this, I think it might skip the very
          // last packet (see the call to PreparePacket)
          size_t input_size = (data.input_buffer_0_block_count +
                              data.input_buffer_1_block_count) * 2048;
          size_t input_offset = (data.input_buffer_read_offset / 8 - 4);
          size_t input_remaining = input_size - input_offset;
          if (input_remaining == 0) {
            // We're finished!
            break;
          }
          // Now check the output buffer.
          size_t output_size = data.output_buffer_block_count * 256;
          size_t output_offset = data.output_buffer_write_offset * 256;
          size_t output_remaining = output_size - output_offset;
          if (output_remaining == 0) {
            // Can't write any more data. Break.
            // The game will kick us again with a new output buffer later.
            break;
          }
          // This'll copy audio samples into the output buffer.
          // The samples need to be 2 bytes long!
          // Copies one frame at a time, so keep calling this until size == 0
          int read = context.decoder->DecodePacket(out, output_offset,
                                                   output_remaining);
          if (read < 0) {
            XELOGAPU("APU failed to decode packet (returned %.8X)", -read);
            context.decoder->DiscardPacket();
            // TODO: Set error state
            break;
          }
          if (read == 0) {
            // Select sample rate.
            int sample_rate = 0;
            if (data.sample_rate == 0) {
              // TODO: Test this
              sample_rate = 24000;
            } else if (data.sample_rate == 1) {
              sample_rate = 32000;
            } else if (data.sample_rate == 2) {
              // TODO: Test this
              sample_rate = 44100;
            } else if (data.sample_rate == 3) {
              // TODO: Test this
              sample_rate = 48000;
            }
            // Channels
            int channels = 1;
            if (data.is_stereo == 1) {
              channels = 2;
            }
            // New packet time.
            // TODO: Select input buffer 1 if necessary.
            auto packet = in0 + input_offset;
            context.decoder->PreparePacket(packet, 2048, sample_rate, channels);
            input_offset += 2048;
          }
          output_offset += read;
          // blah copy these back to the context
          data.input_buffer_read_offset = (input_offset + 4) * 8;
          data.output_buffer_write_offset = output_offset / 256;
        }
        data.Store(ptr);
      }
      context.lock.unlock();
    }
  }
  xe::Profiler::ThreadExit();
 }
 void AudioSystem::Initialize() {}
 void AudioSystem::Shutdown() {
@ -162,30 +329,45 @@ void AudioSystem::Shutdown() {
  worker_thread_->Wait(0, 0, 0, nullptr);
  worker_thread_->Release();
  decoder_running_ = false;
  decoder_fence_.Signal();
  memory()->SystemHeapFree(registers_.xma_context_array_ptr);
 }
 uint32_t AudioSystem::AllocateXmaContext() {
  std::lock_guard<std::mutex> lock(lock_);
-  if (xma_context_free_list_.empty()) {
+
-    // No contexts available.
+  for (uint32_t n = 0; n < kXmaContextCount; n++) {
-    return 0;
+    XMAContext& context = xma_context_array_[n];
    if (!context.in_use) {
      context.in_use = true;
      return context.guest_ptr;
    }
  }
-  auto guest_ptr = xma_context_free_list_.back();
+  return 0;
  xma_context_free_list_.pop_back();
  auto context_ptr = memory()->TranslateVirtual(guest_ptr);
  return guest_ptr;
 }
 void AudioSystem::ReleaseXmaContext(uint32_t guest_ptr) {
  std::lock_guard<std::mutex> lock(lock_);
-  auto context_ptr = memory()->TranslateVirtual(guest_ptr);
+  // Find it in the list.
-  std::memset(context_ptr, 0, kXmaContextSize);
+  for (uint32_t n = 0; n < kXmaContextCount; n++) {
    XMAContext& context = xma_context_array_[n];
    if (context.guest_ptr == guest_ptr) {
      // Found it!
      // Lock it in case the decoder thread is working on it now
      context.lock.lock();
-  xma_context_free_list_.push_back(guest_ptr);
+      context.in_use = false;
      auto context_ptr = memory()->TranslateVirtual(guest_ptr);
      std::memset(context_ptr, 0, kXmaContextSize); // Zero it.
      context.decoder->DiscardPacket();
      context.lock.unlock();
    }
  }
 }
 X_STATUS AudioSystem::RegisterClient(uint32_t callback, uint32_t callback_arg,
@ -280,42 +462,49 @@ void AudioSystem::WriteRegister(uint32_t addr, uint64_t value) {
  if (r >= 0x1940 && r <= 0x1940 + 9 * 4) {
    // Context kick command.
    // This will kick off the given hardware contexts.
    // Basically, this kicks the SPU and says "hey, decode that audio!"
    // XMAEnableContext
    // The context ID is a bit in the range of the entire context array
    for (int i = 0; value && i < 32; ++i) {
      if (value & 1) {
        uint32_t context_id = i + (r - 0x1940) / 4 * 32;
-        XELOGAPU("AudioSystem: kicking context %d", context_id);
+        XMAContext& context = xma_context_array_[context_id];
-        // Games check bits 20/21 of context[0].
+
-        // If both bits are set buffer full, otherwise room available.
+        context.lock.lock();
-        // Right after a kick we always set buffers to invalid so games keep
+        auto context_ptr = memory()->TranslateVirtual(context.guest_ptr);
        // feeding data.
        uint32_t guest_ptr =
            registers_.xma_context_array_ptr + context_id * kXmaContextSize;
        auto context_ptr = memory()->TranslateVirtual(guest_ptr);
        XMAContextData data(context_ptr);
-        bool has_valid_input = data.input_buffer_0_valid ||
+        XELOGAPU("AudioSystem: kicking context %d (%d/%d bytes)", context_id,
-                               data.input_buffer_1_valid;
+                 data.input_buffer_read_offset, data.input_buffer_0_block_count
-        if (has_valid_input) {
+                 * XMAContextData::kBytesPerBlock);
          // Invalidate the buffers.
          data.input_buffer_0_valid = 0;
          data.input_buffer_1_valid = 0;
-          // Set output buffer to invalid.
+        // Reset valid flags so our audio decoder knows to process this one
-          data.output_buffer_valid = false;
+        data.input_buffer_0_valid = data.input_buffer_0_ptr != 0;
-        }
+        data.input_buffer_1_valid = data.input_buffer_1_ptr != 0;
        data.output_buffer_write_offset = 0;
        data.Store(context_ptr);
        context.lock.unlock();
        // Signal the decoder thread
        decoder_fence_.Signal();
      }
      value >>= 1;
    }
  } else if (r >= 0x1A40 && r <= 0x1A40 + 9 * 4) {
    // Context lock command.
    // This requests a lock by flagging the context.
    // XMADisableContext
    for (int i = 0; value && i < 32; ++i) {
      if (value & 1) {
        uint32_t context_id = i + (r - 0x1A40) / 4 * 32;
        XELOGAPU("AudioSystem: set context lock %d", context_id);
-        // TODO(benvanik): set lock?
+
        // TODO: Find the correct way to lock/unlock this.
        // I thought we could lock it here, unlock it in the kick but that
        // doesn't seem to work
        XMAContext& context = xma_context_array_[context_id];
      }
      value >>= 1;
    }
@ -326,15 +515,11 @@ void AudioSystem::WriteRegister(uint32_t addr, uint64_t value) {
      if (value & 1) {
        uint32_t context_id = i + (r - 0x1A80) / 4 * 32;
        XELOGAPU("AudioSystem: reset context %d", context_id);
        // TODO(benvanik): something?
        uint32_t guest_ptr =
            registers_.xma_context_array_ptr + context_id * kXmaContextSize;
        auto context_ptr = memory()->TranslateVirtual(guest_ptr);
        XMAContextData data(context_ptr);
        data.Store(context_ptr);
      }
      value >>= 1;
    }
--- a/src/xenia/apu/audio_system.h
+++ b/src/xenia/apu/audio_system.h
@ -27,10 +27,16 @@ namespace xe {
 namespace apu {
 class AudioDriver;
 class AudioDecoder;
 // This is stored in guest space in big-endian order.
 // We load and swap the whole thing to splat here so that we can
 // use bitfields.
 // This could be important:
 // http://www.fmod.org/questions/question/forum-15859
 // Appears to be dumped in order (for the most part)
 // http://pastebin.com/9amqJ2kQ
 struct XMAContextData {
  static const uint32_t kSize = 64;
  static const uint32_t kBytesPerBlock = 2048;
@ -39,53 +45,54 @@ struct XMAContextData {
  // DWORD 0
  uint32_t input_buffer_0_block_count : 12;  // XMASetInputBuffer0, number of
-                                             // 2KB blocks.
+                                             // 2KB blocks. AKA SizeRead0
-  uint32_t loop_count : 8;                   // +12bit, XMASetLoopData
+                                             // Maximum 4095 packets.
  uint32_t loop_count : 8;                   // +12bit, XMASetLoopData NumLoops
  uint32_t input_buffer_0_valid : 1;         // +20bit, XMAIsInputBuffer0Valid
  uint32_t input_buffer_1_valid : 1;         // +21bit, XMAIsInputBuffer1Valid
-  uint32_t output_buffer_block_count : 5;    // +22bit
+  uint32_t output_buffer_block_count : 5;    // +22bit SizeWrite 256byte blocks
-  uint32_t
+  uint32_t output_buffer_write_offset : 5;   // +27bit, XMAGetOutputBufferWriteOffset
-    output_buffer_write_offset : 5;  // +27bit, XMAGetOutputBufferWriteOffset
+                                             // AKA OffsetWrite
-                                     // DWORD 1
+  // DWORD 1
  uint32_t input_buffer_1_block_count : 12;  // XMASetInputBuffer1, number of
                                             // 2KB blocks.
  uint32_t loop_subframe_end : 2;            // +12bit, XMASetLoopData
-  uint32_t unk_dword_1_a : 3;                // ?
+  uint32_t unk_dword_1_a : 3;                // ? might be loop_subframe_skip
-  uint32_t loop_subframe_skip : 3;           // +17bit, XMASetLoopData
+  uint32_t loop_subframe_skip : 3;           // +17bit, XMASetLoopData might be subframe_decode_count
-  uint32_t subframe_decode_count : 4;        // +20bit
+  uint32_t subframe_decode_count : 4;        // +20bit might be subframe_skip_count
-  uint32_t unk_dword_1_b : 3;                // ?
+  uint32_t unk_dword_1_b : 3;                // ? NumSubframesToSkip/NumChannels(?)
-  uint32_t sample_rate : 2;                  // +27bit
+  uint32_t sample_rate : 2;                  // +27bit enum of sample rates
-  uint32_t is_stereo : 1;                    // +29bit
+  uint32_t is_stereo : 1;                    // +29bit might be NumChannels
-  uint32_t unk_dword_1_c : 1;                // ?
+  uint32_t unk_dword_1_c : 1;                // ? part of NumChannels?
  uint32_t output_buffer_valid : 1;          // +31bit, XMAIsOutputBufferValid
-                                             // DWORD 2
+  // DWORD 2
-  uint32_t input_buffer_read_offset : 30;  // XMAGetInputBufferReadOffset
+  uint32_t input_buffer_read_offset : 30;   // XMAGetInputBufferReadOffset
-  uint32_t unk_dword_2 : 2;                // ?
+  uint32_t unk_dword_2 : 2;                 // ErrorStatus/ErrorSet (?)
-                                           // DWORD 3
+  // DWORD 3
-  uint32_t loop_start : 26;  // XMASetLoopData
+  uint32_t loop_start : 26;  // XMASetLoopData LoopStartOffset
-  uint32_t unk_dword_3 : 6;  // ?
+  uint32_t unk_dword_3 : 6;  // ? ParserErrorStatus/ParserErrorSet(?)
-                             // DWORD 4
+  // DWORD 4
-  uint32_t loop_end : 26;        // XMASetLoopData
+  uint32_t loop_end : 26;        // XMASetLoopData LoopEndOffset
  uint32_t packet_metadata : 5;  // XMAGetPacketMetadata
  uint32_t current_buffer : 1;   // ?
-                                // DWORD 5
+  // DWORD 5
  uint32_t input_buffer_0_ptr;  // physical address
-                                // DWORD 6
+  // DWORD 6
  uint32_t input_buffer_1_ptr;  // physical address
-                                // DWORD 7
+  // DWORD 7
  uint32_t output_buffer_ptr;   // physical address
-                                // DWORD 8
+  // DWORD 8
-  uint32_t unk_dword_8;         // Some kind of pointer like output_buffer_ptr
+  uint32_t overlap_add_ptr;     // PtrOverlapAdd(?)
-                                // DWORD 9
+  // DWORD 9
-  uint32_t
+  // +0bit, XMAGetOutputBufferReadOffset AKA WriteBufferOffsetRead
-    output_buffer_read_offset : 5;  // +0bit, XMAGetOutputBufferReadOffset
+  uint32_t output_buffer_read_offset : 5;  
-  uint32_t unk_dword_9 : 27;
+  uint32_t unk_dword_9 : 27; // StopWhenDone/InterruptWhenDone(?)
  XMAContextData(const void* ptr) {
    xe::copy_and_swap_32_aligned(reinterpret_cast<uint32_t*>(this),
@ -135,6 +142,7 @@ class AudioSystem {
 private:
  void WorkerThreadMain();
  void DecoderThreadMain();
  static uint64_t MMIOReadRegisterThunk(AudioSystem* as, uint32_t addr) {
    return as->ReadRegister(addr);
@ -154,6 +162,10 @@ class AudioSystem {
  std::atomic<bool> worker_running_;
  kernel::XHostThread* worker_thread_;
  std::atomic<bool> decoder_running_;
  std::thread decoder_thread_;
  xe::threading::Fence decoder_fence_;
  std::mutex lock_;
  // Stored little endian, accessed through 0x7FEA....
@ -176,7 +188,17 @@ class AudioSystem {
    } registers_;
    uint32_t register_file_[0xFFFF / 4];
  };
  struct XMAContext {
    uint32_t    guest_ptr;
    std::mutex  lock;
    bool        in_use;
    AudioDecoder* decoder;
  };
  XMAContext xma_context_array_[320];
  std::vector<uint32_t> xma_context_free_list_;
  std::vector<uint32_t> xma_context_used_list_; // XMA contexts in use
  static const size_t maximum_client_count_ = 8;