[APU] Added new XMA decoder as config option.

2024-03-25 21:59:49 +01:00 · 2024-03-25 21:59:49 +01:00 · 122f58c9dd
parent 26ea81624a
commit 122f58c9dd
9 changed files with 1976 additions and 996 deletions
--- a/src/xenia/apu/xma_context.cc
+++ b/src/xenia/apu/xma_context.cc
@ -13,7 +13,6 @@
 #include <cstring>

 #include "xenia/apu/xma_decoder.h"
-#include "xenia/apu/xma_helpers.h"
 #include "xenia/base/bit_stream.h"
 #include "xenia/base/logging.h"
 #include "xenia/base/platform.h"
@ -39,259 +38,11 @@ namespace apu {

 XmaContext::XmaContext() = default;

-XmaContext::~XmaContext() {
-  if (av_context_) {
-    if (avcodec_is_open(av_context_)) {
-      avcodec_close(av_context_);
-    }
-    av_free(av_context_);
-  }
-  if (av_frame_) {
-    av_frame_free(&av_frame_);
-  }
-  // if (current_frame_) {
-  //   delete[] current_frame_;
-  //  }
-}
+XmaContext::~XmaContext() {}

-int XmaContext::Setup(uint32_t id, Memory* memory, uint32_t guest_ptr) {
-  id_ = id;
-  memory_ = memory;
-  guest_ptr_ = guest_ptr;
-
-  // Allocate ffmpeg stuff:
-  av_packet_ = av_packet_alloc();
-  assert_not_null(av_packet_);
-  // chrispy: preallocate this buffer so that ffmpeg isn't reallocating it for
-  // every packet, these allocations were causing RtlSubsegmentInitialize
-  av_packet_->buf = av_buffer_alloc(128 * 1024);
-  // find the XMA2 audio decoder
-  av_codec_ = avcodec_find_decoder(AV_CODEC_ID_XMAFRAMES);
-  if (!av_codec_) {
-    XELOGE("XmaContext {}: Codec not found", id);
-    return 1;
-  }
-
-  av_context_ = avcodec_alloc_context3(av_codec_);
-  if (!av_context_) {
-    XELOGE("XmaContext {}: Couldn't allocate context", id);
-    return 1;
-  }
-
-  // Initialize these to 0. They'll actually be set later.
-  av_context_->channels = 0;
-  av_context_->sample_rate = 0;
-
-  av_frame_ = av_frame_alloc();
-  if (!av_frame_) {
-    XELOGE("XmaContext {}: Couldn't allocate frame", id);
-    return 1;
-  }
-
-  // FYI: We're purposely not opening the codec here. That is done later.
-  return 0;
-}
-
-bool XmaContext::Work() {
-  if (!is_enabled() || !is_allocated()) {
-    return false;
-  }
-  {
-    std::lock_guard<xe_mutex> lock(lock_);
-    set_is_enabled(false);
-
-    auto context_ptr = memory()->TranslateVirtual(guest_ptr());
-    XMA_CONTEXT_DATA data(context_ptr);
-    Decode(&data);
-    data.Store(context_ptr);
-    return true;
-  }
-}
-
-void XmaContext::Enable() {
-  std::lock_guard<xe_mutex> lock(lock_);
-
-  auto context_ptr = memory()->TranslateVirtual(guest_ptr());
-  XMA_CONTEXT_DATA data(context_ptr);
-
-  XELOGAPU("XmaContext: kicking context {} (buffer {} {}/{} bits)", id(),
-           data.current_buffer, data.input_buffer_read_offset,
-           (data.current_buffer == 0 ? data.input_buffer_0_packet_count
-                                     : data.input_buffer_1_packet_count) *
-               kBitsPerPacket);
-
-  data.Store(context_ptr);
-
-  set_is_enabled(true);
-}
-
-bool XmaContext::Block(bool poll) {
-  if (!lock_.try_lock()) {
-    if (poll) {
-      return false;
-    }
-    lock_.lock();
-  }
-  lock_.unlock();
-  return true;
-}
-
-void XmaContext::Clear() {
-  std::lock_guard<xe_mutex> lock(lock_);
-  XELOGAPU("XmaContext: reset context {}", id());
-
-  auto context_ptr = memory()->TranslateVirtual(guest_ptr());
-  XMA_CONTEXT_DATA data(context_ptr);
-
-  data.input_buffer_0_valid = 0;
-  data.input_buffer_1_valid = 0;
-  data.output_buffer_valid = 0;
-
-  data.input_buffer_read_offset = 0;
-  data.output_buffer_read_offset = 0;
-  data.output_buffer_write_offset = 0;
-
-  xma_frame_.fill(0);
-  split_frame_len_ = 0;
-  split_frame_len_partial_ = 0;
-  split_frame_padding_start_ = 0;
-
-  data.Store(context_ptr);
-}
-
-void XmaContext::Disable() {
-  std::lock_guard<xe_mutex> lock(lock_);
-  XELOGAPU("XmaContext: disabling context {}", id());
-  set_is_enabled(false);
-}
-
-void XmaContext::Release() {
-  // Lock it in case the decoder thread is working on it now.
-  std::lock_guard<xe_mutex> lock(lock_);
-  assert_true(is_allocated_ == true);
-
-  set_is_allocated(false);
-  auto context_ptr = memory()->TranslateVirtual(guest_ptr());
-  std::memset(context_ptr, 0, sizeof(XMA_CONTEXT_DATA));  // Zero it.
-}
-
-void XmaContext::SwapInputBuffer(XMA_CONTEXT_DATA* data) {
-  // No more frames.
-  if (data->current_buffer == 0) {
-    data->input_buffer_0_valid = 0;
-  } else {
-    data->input_buffer_1_valid = 0;
-  }
-  data->current_buffer ^= 1;
-  data->input_buffer_read_offset = kBitsPerHeader;
-}
-
-bool XmaContext::TrySetupNextLoop(XMA_CONTEXT_DATA* data,
-                                  bool ignore_input_buffer_offset) {
-  // Setup the input buffer offset if next loop exists.
-  // TODO(Pseudo-Kernel): Need to handle loop in the following cases.
-  // 1. loop_start == loop_end == 0
-  // 2. loop_start > loop_end && loop_count > 0
-  if (data->loop_count > 0 && data->loop_start < data->loop_end &&
-      (ignore_input_buffer_offset ||
-       data->input_buffer_read_offset >= data->loop_end)) {
-    // Loop back to the beginning.
-    data->input_buffer_read_offset = data->loop_start;
-    if (data->loop_count < 255) {
-      data->loop_count--;
-    }
-    return true;
-  }
-  return false;
-}
-
-/*
-void XmaContext::NextPacket(
-    uint8_t* input_buffer,
-    uint32_t input_size,
-    uint32_t input_buffer_read_offset) {
-*/
-void XmaContext::NextPacket(XMA_CONTEXT_DATA* data) {
-  // auto packet_idx = GetFramePacketNumber(input_buffer, input_size,
-  // input_buffer_read_offset);
-
-  // packet_idx++;
-  // if (packet_idx++ >= input_size)
-}
-
-int XmaContext::GetSampleRate(int id) {
-  switch (id) {
-    case 0:
-      return 24000;
-    case 1:
-      return 32000;
-    case 2:
-      return 44100;
-    case 3:
-      return 48000;
-  }
-  assert_always();
-  return 0;
-}
-
-bool XmaContext::ValidFrameOffset(uint8_t* block, size_t size_bytes,
-                                  size_t frame_offset_bits) {
-  uint32_t packet_num =
-      GetFramePacketNumber(block, size_bytes, frame_offset_bits);
-  if (packet_num == -1) {
-    // Invalid packet number
-    XELOGAPU("ValidFrameOffset: Invalid packet number");
-    return false;
-  }
-
-  uint8_t* packet = block + (packet_num * kBytesPerPacket);
-  size_t relative_offset_bits = frame_offset_bits % kBitsPerPacket;
-
-  uint32_t first_frame_offset = xma::GetPacketFrameOffset(packet);
-  if (first_frame_offset == -1 || first_frame_offset > kBitsPerPacket) {
-    XELOGAPU("ValidFrameOffset: Invalid frame offset {}", first_frame_offset);
-    // Packet only contains a partial frame, so no frames can start here.
-    return false;
-  }
-
-  BitStream stream(packet, kBitsPerPacket);
-  stream.SetOffset(first_frame_offset);
-  while (true) {
-    if (stream.offset_bits() == relative_offset_bits) {
-      return true;
-    }
-
-    if (stream.BitsRemaining() < 15) {
-      XELOGAPU("ValidFrameOffset: No room for next frame header {}",
-               first_frame_offset);
-      // Not enough room for another frame header.
-      return false;
-    }
-
-    uint64_t size = stream.Read(15);
-    if ((size - 15) > stream.BitsRemaining()) {
-      XELOGAPU("ValidFrameOffset: Last frame {} - {}", first_frame_offset,
-               size);
-      // Last frame.
-      return false;
-    } else if (size == 0x7FFF) {
-      // Invalid frame (and last of this packet)
-      return false;
-    }
-
-    stream.Advance(size - 16);
-
-    // Read the trailing bit to see if frames follow
-    if (stream.Read(1) == 0) {
-      break;
-    }
-  }
-
-  return false;
-}
-
-static void dump_raw(AVFrame* frame, int id) {
-  FILE* outfile = fopen(fmt::format("out{}.raw", id).c_str(), "ab");
+void XmaContext::DumpRaw(AVFrame* frame, int id) {
+  FILE* outfile =
+      xe::filesystem::OpenFile(fmt::format("out{}.raw", id).c_str(), "ab");
  if (!outfile) {
    return;
  }
@ -304,661 +55,6 @@ static void dump_raw(AVFrame* frame, int id) {
  fclose(outfile);
 }

-void XmaContext::Decode(XMA_CONTEXT_DATA* data) {
-  SCOPE_profile_cpu_f("apu");
-
-  // What I see:
-  // XMA outputs 2 bytes per sample
-  // 512 samples per frame (128 per subframe)
-  // Max output size is data.output_buffer_block_count * 256
-
-  // This decoder is fed packets (max 4095 per buffer)
-  // Packets contain "some" frames
-  // 32bit header (big endian)
-
-  // Frames are the smallest thing the SPUs can decode.
-  // They can and usually will span packets.
-
-  // Sample rates (data.sample_rate):
-  // 0 - 24 kHz
-  // 1 - 32 kHz
-  // 2 - 44.1 kHz
-  // 3 - 48 kHz
-
-  // SPUs also support stereo decoding. (data.is_stereo)
-
-  // Check the output buffer - we cannot decode anything else if it's
-  // unavailable.
-  if (!data->output_buffer_valid) {
-    return;
-  }
-
-  // No available data.
-  if (!data->input_buffer_0_valid && !data->input_buffer_1_valid) {
-    return;
-  }
-
-  // XAudio Loops
-  // loop_count:
-  //  - XAUDIO2_MAX_LOOP_COUNT = 254
-  //  - XAUDIO2_LOOP_INFINITE = 255
-  // loop_start/loop_end are bit offsets to a specific frame
-
-  // Translate pointers for future use.
-  // Sometimes the game will use rolling input buffers. If they do, we cannot
-  // assume they form a complete block! In addition, the buffers DO NOT have
-  // to be contiguous!
-  uint8_t* in0 = data->input_buffer_0_valid
-                     ? memory()->TranslatePhysical(data->input_buffer_0_ptr)
-                     : nullptr;
-  uint8_t* in1 = data->input_buffer_1_valid
-                     ? memory()->TranslatePhysical(data->input_buffer_1_ptr)
-                     : nullptr;
-  uint8_t* current_input_buffer = data->current_buffer ? in1 : in0;
-
-  if (!current_input_buffer) {
-    XELOGE("XmaContext {}: Error - input buffer pointer is invalid!", id());
-    return;
-  }
-
-  if (!data->output_buffer_block_count) {
-    XELOGE("XmaContext {}: Error - Received 0 for output_buffer_block_count!",
-           id());
-    return;
-  }
-
-  XELOGAPU(
-      "Processing context {} (offset {}, buffer {}, ptr {:p}, output buffer "
-      "{:08X}, output buffer count {})",
-      id(), data->input_buffer_read_offset, data->current_buffer,
-      current_input_buffer, data->output_buffer_ptr,
-      data->output_buffer_block_count);
-
-  if (is_stream_done_) {
-    is_stream_done_ = false;
-    packets_skip_ = 0;
-    SwapInputBuffer(data);
-    return;
-  }
-
-  size_t input_buffer_0_size =
-      data->input_buffer_0_packet_count * kBytesPerPacket;
-  size_t input_buffer_1_size =
-      data->input_buffer_1_packet_count * kBytesPerPacket;
-
-  size_t current_input_size =
-      data->current_buffer ? input_buffer_1_size : input_buffer_0_size;
-  size_t current_input_packet_count = current_input_size / kBytesPerPacket;
-  bool is_streaming = data->input_buffer_0_packet_count == 1 &&
-                      data->input_buffer_1_packet_count == 1;
-
-  // Output buffers are in raw PCM samples, 256 bytes per block.
-  // Output buffer is a ring buffer. We need to write from the write offset
-  // to the read offset.
-  uint8_t* output_buffer = memory()->TranslatePhysical(data->output_buffer_ptr);
-  uint32_t output_capacity =
-      data->output_buffer_block_count * kBytesPerSubframeChannel;
-  uint32_t output_read_offset =
-      data->output_buffer_read_offset * kBytesPerSubframeChannel;
-  uint32_t output_write_offset =
-      data->output_buffer_write_offset * kBytesPerSubframeChannel;
-
-  RingBuffer output_rb(output_buffer, output_capacity);
-  output_rb.set_read_offset(output_read_offset);
-  output_rb.set_write_offset(output_write_offset);
-
-  // We can only decode an entire frame and write it out at a time, so
-  // don't save any samples.
-  // TODO(JoelLinn): subframes when looping
-  size_t output_remaining_bytes = output_rb.write_count();
-  output_remaining_bytes -=
-      output_remaining_bytes % (kBytesPerFrameChannel << data->is_stereo);
-
-  // is_dirty_ = true; // TODO
-  // is_dirty_ = false;  // TODO
-  assert_false(data->stop_when_done);
-  assert_false(data->interrupt_when_done);
-  static int total_samples = 0;
-  // Decode until we can't write any more data.
-  while (output_remaining_bytes > 0) {
-    if (!data->input_buffer_0_valid && !data->input_buffer_1_valid) {
-      // Out of data.
-      break;
-    }
-    // Setup the input buffer if we are at loop_end.
-    // The input buffer must not be swapped out until all loops are processed.
-    bool reuse_input_buffer = TrySetupNextLoop(data, false);
-
-    // assert_true(packets_skip_ == 0);
-    // assert_true(split_frame_len_ == 0);
-    // assert_true(split_frame_len_partial_ == 0);
-
-    // Where are we in the buffer (in XMA jargon)
-    int packet_idx, frame_idx, frame_count;
-    uint8_t* packet;
-    bool frame_last_split;
-
-    BitStream stream(current_input_buffer, current_input_size * 8);
-    stream.SetOffset(data->input_buffer_read_offset);
-
-    if (data->input_buffer_read_offset > current_input_size * 8) {
-      XELOGE(
-          "XmaContext {}: Error - Provided input offset exceed input buffer "
-          "size! ({} > {})",
-          id(), data->input_buffer_read_offset, current_input_size * 8);
-      SwapInputBuffer(data);
-      return;
-    }
-    // if we had a buffer swap try to skip packets first
-    if (packets_skip_ > 0) {
-      packet_idx =
-          GetFramePacketNumber(current_input_buffer, current_input_size,
-                               data->input_buffer_read_offset);
-      while (packets_skip_ > 0) {
-        packets_skip_--;
-        packet_idx++;
-        if (packet_idx > current_input_packet_count) {
-          if (!reuse_input_buffer) {
-            // Last packet. Try setup once more.
-            reuse_input_buffer = TrySetupNextLoop(data, true);
-          }
-          if (!reuse_input_buffer) {
-            if (is_streaming) {
-              SwapInputBuffer(data);
-            } else {
-              is_stream_done_ = true;
-            }
-          }
-          return;
-        }
-      }
-      // invalid frame pointer but needed for us
-      data->input_buffer_read_offset = packet_idx * kBitsPerPacket;
-      // continue;
-    }
-
-    if (split_frame_len_) {
-      // handle a frame that was split over two packages
-      packet_idx =
-          GetFramePacketNumber(current_input_buffer, current_input_size,
-                               data->input_buffer_read_offset);
-      packet = current_input_buffer + packet_idx * kBytesPerPacket;
-      std::tie(frame_count, frame_last_split) = GetPacketFrameCount(packet);
-      frame_idx = -1;
-
-      stream =
-          BitStream(current_input_buffer, (packet_idx + 1) * kBitsPerPacket);
-      stream.SetOffset(packet_idx * kBitsPerPacket + kBitsPerHeader);
-
-      if (split_frame_len_ > xma::kMaxFrameLength) {
-        // TODO write CopyPeekMethod
-        auto offset = stream.offset_bits();
-        stream.Copy(
-            xma_frame_.data() + 1 +
-                ((split_frame_len_partial_ + split_frame_padding_start_) / 8),
-            15 - split_frame_len_partial_);
-        stream.SetOffset(offset);
-        BitStream slen(xma_frame_.data() + 1, 15 + split_frame_padding_start_);
-        slen.Advance(split_frame_padding_start_);
-        split_frame_len_ = static_cast<int>(slen.Read(15));
-      }
-
-      if (frame_count > 0) {
-        // assert_true(xma::GetPacketFrameOffset(packet) - 32 ==
-        //             split_frame_len_ - split_frame_len_partial_);
-      }
-
-      auto offset = stream.Copy(
-          xma_frame_.data() + 1 +
-              ((split_frame_len_partial_ + split_frame_padding_start_) / 8),
-          split_frame_len_ - split_frame_len_partial_);
-      assert_true(offset ==
-                  (split_frame_padding_start_ + split_frame_len_partial_) % 8);
-    } else {
-      if (data->input_buffer_read_offset % kBitsPerPacket == 0) {
-        // Invalid offset. Go ahead and set it.
-        int packet_number =
-            GetFramePacketNumber(current_input_buffer, current_input_size,
-                                 data->input_buffer_read_offset);
-
-        if (packet_number == -1) {
-          return;
-        }
-
-        auto offset =
-            xma::GetPacketFrameOffset(current_input_buffer +
-                                      kBytesPerPacket * packet_number) +
-            data->input_buffer_read_offset;
-        if (offset == -1) {
-          // No more frames.
-          SwapInputBuffer(data);
-          // TODO partial frames? end?
-          XELOGE("XmaContext {}: TODO partial frames? end?", id());
-          assert_always("TODO");
-          return;
-        } else {
-          data->input_buffer_read_offset = offset;
-        }
-      }
-
-      if (!ValidFrameOffset(current_input_buffer, current_input_size,
-                            data->input_buffer_read_offset)) {
-        XELOGAPU("XmaContext {}: Error - Invalid read offset {}!", id(),
-                 data->input_buffer_read_offset);
-        SwapInputBuffer(data);
-        return;
-      }
-
-      // Where are we in the buffer (in XMA jargon)
-      std::tie(packet_idx, frame_idx) =
-          GetFrameNumber(current_input_buffer, current_input_size,
-                         data->input_buffer_read_offset);
-      // TODO handle
-      assert_true(packet_idx >= 0);
-      assert_true(frame_idx >= 0);
-      packet = current_input_buffer + packet_idx * kBytesPerPacket;
-      // frames that belong to this packet
-      std::tie(frame_count, frame_last_split) = GetPacketFrameCount(packet);
-      assert_true(frame_count >= 0);  // TODO end
-
-      PrepareDecoder(packet, data->sample_rate, bool(data->is_stereo));
-
-      // Current frame is split to next packet:
-      bool frame_is_split = frame_last_split && (frame_idx >= frame_count - 1);
-
-      stream =
-          BitStream(current_input_buffer, (packet_idx + 1) * kBitsPerPacket);
-      stream.SetOffset(data->input_buffer_read_offset);
-      // int frame_len;
-      // int frame_len_partial
-      split_frame_len_partial_ = static_cast<int>(stream.BitsRemaining());
-      if (split_frame_len_partial_ >= 15) {
-        split_frame_len_ = static_cast<int>(stream.Peek(15));
-      } else {
-        // assert_always();
-        split_frame_len_ = xma::kMaxFrameLength + 1;
-      }
-      assert_true(frame_is_split ==
-                  (split_frame_len_ > split_frame_len_partial_));
-
-      // TODO fix bitstream copy
-      std::memset(xma_frame_.data(), 0, xma_frame_.size());
-
-      {
-        int32_t bits_to_copy =
-            std::min(split_frame_len_, split_frame_len_partial_);
-
-        if (!stream.IsOffsetValid(bits_to_copy)) {
-          XELOGAPU(
-              "XmaContext {}: Error - Invalid amount of bits to copy! "
-              "split_frame_len: {}, split_partial: {}, offset_bits: {}",
-              id(), split_frame_len_, split_frame_len_partial_,
-              stream.offset_bits());
-          SwapInputBuffer(data);
-          return;
-        }
-        auto offset = stream.Copy(xma_frame_.data() + 1, bits_to_copy);
-        assert_true(offset < 8);
-        split_frame_padding_start_ = static_cast<uint8_t>(offset);
-      }
-
-      if (frame_is_split) {
-        // go to next xma packet of this stream
-        packets_skip_ = xma::GetPacketSkipCount(packet) + 1;
-        while (packets_skip_ > 0) {
-          packets_skip_--;
-          packet += kBytesPerPacket;
-          packet_idx++;
-          if (packet_idx >= current_input_packet_count) {
-            if (!reuse_input_buffer) {
-              // Last packet. Try setup once more.
-              reuse_input_buffer = TrySetupNextLoop(data, true);
-            }
-            if (!reuse_input_buffer) {
-              if (is_streaming) {
-                SwapInputBuffer(data);
-              } else {
-                is_stream_done_ = true;
-              }
-            }
-            return;
-          }
-        }
-        // TODO guest might read this:
-        data->input_buffer_read_offset = packet_idx * kBitsPerPacket;
-        continue;
-      }
-    }
-
-    av_packet_->data = xma_frame_.data();
-    av_packet_->size = static_cast<int>(
-        1 + ((split_frame_padding_start_ + split_frame_len_) / 8) +
-        (((split_frame_padding_start_ + split_frame_len_) % 8) ? 1 : 0));
-
-    auto padding_end = av_packet_->size * 8 -
-                       (8 + split_frame_padding_start_ + split_frame_len_);
-    assert_true(padding_end < 8);
-    xma_frame_[0] =
-        ((split_frame_padding_start_ & 7) << 5) | ((padding_end & 7) << 2);
-
-    split_frame_len_ = 0;
-    split_frame_len_partial_ = 0;
-    split_frame_padding_start_ = 0;
-
-    auto ret = avcodec_send_packet(av_context_, av_packet_);
-    if (ret < 0) {
-      XELOGE("XmaContext {}: Error - Sending packet for decoding failed", id());
-      // TODO bail out
-      assert_always();
-    }
-    ret = avcodec_receive_frame(av_context_, av_frame_);
-    /*
-    if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF)
-      // TODO AVERROR_EOF???
-      break;
-    else
-    */
-    if (ret < 0) {
-      XELOGE("XmaContext {}: Error - Decoding failed", id());
-      data->parser_error_status = 4;  // TODO(Gliniak): Find all parsing errors
-                                      // and create enumerator from them
-      SwapInputBuffer(data);
-      assert_always();
-      return;  // TODO bail out
-    }
-    assert_true(ret == 0);
-
-    {
-      // copy over 1 frame
-      // update input buffer read offset
-
-      // assert(decoded_consumed_samples_ + kSamplesPerFrame <=
-      //       current_frame_.size());
-      assert_true(av_context_->sample_fmt == AV_SAMPLE_FMT_FLTP);
-      // assert_true(frame_is_split == (frame_idx == -1));
-
-      //			dump_raw(av_frame_, id());
-      ConvertFrame((const uint8_t**)av_frame_->data,
-                   bool(av_frame_->channels > 1), raw_frame_.data());
-      // decoded_consumed_samples_ += kSamplesPerFrame;
-
-      auto byte_count = kBytesPerFrameChannel << data->is_stereo;
-      assert_true(output_remaining_bytes >= byte_count);
-      output_rb.Write(raw_frame_.data(), byte_count);
-      output_remaining_bytes -= byte_count;
-      data->output_buffer_write_offset = output_rb.write_offset() / 256;
-
-      total_samples += id_ == 0 ? kSamplesPerFrame : 0;
-
-      uint32_t offset =
-          std::max(kBitsPerHeader, data->input_buffer_read_offset);
-      offset = static_cast<uint32_t>(
-          GetNextFrame(current_input_buffer, current_input_size, offset));
-
-      XELOGAPU(
-          "XmaContext {}: Next Offset: {} (Frame: {}/{} Packet: {}/{} Packet "
-          "Skip: {} - {})",
-          id(), offset, frame_idx, frame_count - 1, packet_idx,
-          current_input_packet_count, xma::GetPacketSkipCount(packet),
-          data->input_buffer_read_offset);
-      if (frame_idx + 1 >= frame_count) {
-        // Skip to next packet (no split frame)
-        packets_skip_ = xma::GetPacketSkipCount(packet) + 1;
-        while (packets_skip_ > 0) {
-          packets_skip_--;
-          packet_idx++;
-          if (packet_idx >= current_input_packet_count) {
-            if (!reuse_input_buffer) {
-              // Last packet. Try setup once more.
-              reuse_input_buffer = TrySetupNextLoop(data, true);
-            }
-            if (!reuse_input_buffer) {
-              if (is_streaming) {
-                SwapInputBuffer(data);
-                data->input_buffer_read_offset =
-                    GetPacketFirstFrameOffset(data);
-              } else {
-                is_stream_done_ = true;
-              }
-              if (output_rb.write_offset() == output_rb.read_offset()) {
-                data->output_buffer_valid = 0;
-              }
-            }
-            return;
-          }
-        }
-        packet = current_input_buffer + packet_idx * kBytesPerPacket;
-        // TODO(Gliniak): There might be an edge-case when we're in packet 26/27
-        // and GetPacketFrameOffset returns that there is no data in this packet
-        // aka. FrameOffset is set to more than 0x7FFF-0x20
-        offset =
-            xma::GetPacketFrameOffset(packet) + packet_idx * kBitsPerPacket;
-      }
-      if (offset == 0 || frame_idx == -1) {
-        // Next packet but we already skipped to it
-        if (packet_idx >= current_input_packet_count) {
-          // Buffer is fully used
-          if (!reuse_input_buffer) {
-            // Last packet. Try setup once more.
-            reuse_input_buffer = TrySetupNextLoop(data, true);
-          }
-          if (!reuse_input_buffer) {
-            if (is_streaming) {
-              SwapInputBuffer(data);
-            } else {
-              is_stream_done_ = true;
-            }
-          }
-          break;
-        }
-        offset =
-            xma::GetPacketFrameOffset(packet) + packet_idx * kBitsPerPacket;
-      }
-      // TODO buffer bounds check
-      assert_true(data->input_buffer_read_offset < offset);
-      data->input_buffer_read_offset = offset;
-    }
-  }
-
-  // assert_true((split_frame_len_ != 0) == (data->input_buffer_read_offset ==
-  // 0));
-
-  // The game will kick us again with a new output buffer later.
-  // It's important that we only invalidate this if we actually wrote to it!!
-  if (output_rb.write_offset() == output_rb.read_offset()) {
-    data->output_buffer_valid = 0;
-  }
-}
-
-uint32_t XmaContext::GetPacketFirstFrameOffset(const XMA_CONTEXT_DATA* data) {
-  uint32_t first_frame_offset = kBitsPerHeader;
-
-  uint8_t* in0 = data->input_buffer_0_valid
-                     ? memory()->TranslatePhysical(data->input_buffer_0_ptr)
-                     : nullptr;
-  uint8_t* in1 = data->input_buffer_1_valid
-                     ? memory()->TranslatePhysical(data->input_buffer_1_ptr)
-                     : nullptr;
-  uint8_t* current_input_buffer = data->current_buffer ? in1 : in0;
-
-  if (current_input_buffer) {
-    first_frame_offset = xma::GetPacketFrameOffset(current_input_buffer);
-  }
-  return first_frame_offset;
-}
-
-size_t XmaContext::GetNextFrame(uint8_t* block, size_t size,
-                                size_t bit_offset) {
-  // offset = xma::GetPacketFrameOffset(packet);
-  // TODO meh
-  // auto next_packet = bit_offset - bit_offset % kBitsPerPacket +
-  // kBitsPerPacket;
-  auto packet_idx = GetFramePacketNumber(block, size, bit_offset);
-
-  BitStream stream(block, size * 8);
-  stream.SetOffset(bit_offset);
-
-  if (stream.BitsRemaining() < 15) {
-    return 0;
-  }
-
-  uint64_t len = stream.Read(15);
-  if ((len - 15) > stream.BitsRemaining()) {
-    // assert_always("TODO");
-    // *bit_offset = next_packet;
-    // return false;
-    // return next_packet;
-    return 0;
-  } else if (len >= xma::kMaxFrameLength) {
-    assert_always("TODO");
-    // *bit_offset = next_packet;
-    // return false;
-    return 0;
-    // return next_packet;
-  }
-
-  stream.Advance(len - (15 + 1));
-  // Read the trailing bit to see if frames follow
-  if (stream.Read(1) == 0) {
-    return 0;
-  }
-
-  bit_offset += len;
-  if (packet_idx < GetFramePacketNumber(block, size, bit_offset)) {
-    return 0;
-  }
-  return bit_offset;
-}
-
-int XmaContext::GetFramePacketNumber(uint8_t* block, size_t size,
-                                     size_t bit_offset) {
-  size *= 8;
-  if (bit_offset >= size) {
-    // Not good :(
-    assert_always();
-    return -1;
-  }
-
-  size_t byte_offset = bit_offset >> 3;
-  size_t packet_number = byte_offset / kBytesPerPacket;
-
-  return (uint32_t)packet_number;
-}
-
-std::tuple<int, int> XmaContext::GetFrameNumber(uint8_t* block, size_t size,
-                                                size_t bit_offset) {
-  auto packet_idx = GetFramePacketNumber(block, size, bit_offset);
-
-  if (packet_idx < 0 || (packet_idx + 1) * kBytesPerPacket > size) {
-    assert_always();
-    return {packet_idx, -2};
-  }
-
-  if (bit_offset == 0) {
-    return {packet_idx, -1};
-  }
-
-  uint8_t* packet = block + (packet_idx * kBytesPerPacket);
-  auto first_frame_offset = xma::GetPacketFrameOffset(packet);
-  BitStream stream(block, size * 8);
-  stream.SetOffset(packet_idx * kBitsPerPacket + first_frame_offset);
-
-  int frame_idx = 0;
-  while (true) {
-    if (stream.BitsRemaining() < 15) {
-      break;
-    }
-
-    if (stream.offset_bits() == bit_offset) {
-      break;
-    }
-
-    uint64_t size = stream.Read(15);
-    if ((size - 15) > stream.BitsRemaining()) {
-      // Last frame.
-      break;
-    } else if (size == 0x7FFF) {
-      // Invalid frame (and last of this packet)
-      break;
-    }
-
-    stream.Advance(size - (15 + 1));
-
-    // Read the trailing bit to see if frames follow
-    if (stream.Read(1) == 0) {
-      break;
-    }
-    frame_idx++;
-  }
-  return {packet_idx, frame_idx};
-}
-
-std::tuple<int, bool> XmaContext::GetPacketFrameCount(uint8_t* packet) {
-  auto first_frame_offset = xma::GetPacketFrameOffset(packet);
-  if (first_frame_offset > kBitsPerPacket - kBitsPerHeader) {
-    // frame offset is beyond packet end
-    return {0, false};
-  }
-
-  BitStream stream(packet, kBitsPerPacket);
-  stream.SetOffset(first_frame_offset);
-  int frame_count = 0;
-
-  while (true) {
-    if (stream.BitsRemaining() < 15) {
-      return {frame_count, false};
-    }
-
-    frame_count++;
-    uint64_t size = stream.Read(15);
-    if ((size - 15) > stream.BitsRemaining()) {
-      return {frame_count, true};
-    } else if (size == 0x7FFF) {
-      assert_always();
-      return {frame_count, true};
-    }
-
-    stream.Advance(size - (15 + 1));
-
-    if (stream.Read(1) == 0) {
-      return {frame_count, false};
-    }
-    // There is a case when frame ends EXACTLY at the end of packet.
-    // In such case we shouldn't increase frame count by additional not existing
-    // frame and don't mark it as splitted, but as a normal frame
-    if (!stream.BitsRemaining()) {
-      return {frame_count, false};
-    }
-  }
-}
-
-int XmaContext::PrepareDecoder(uint8_t* packet, int sample_rate,
-                               bool is_two_channel) {
-  // Sanity check: Packet metadata is always 1 for XMA2/0 for XMA
-  assert_true((packet[2] & 0x7) == 1 || (packet[2] & 0x7) == 0);
-
-  sample_rate = GetSampleRate(sample_rate);
-
-  // Re-initialize the context with new sample rate and channels.
-  uint32_t channels = is_two_channel ? 2 : 1;
-  if (av_context_->sample_rate != sample_rate ||
-      av_context_->channels != channels) {
-    // We have to reopen the codec so it'll realloc whatever data it needs.
-    // TODO(DrChat): Find a better way.
-    avcodec_close(av_context_);
-
-    av_context_->sample_rate = sample_rate;
-    av_context_->channels = channels;
-
-    if (avcodec_open2(av_context_, av_codec_, NULL) < 0) {
-      XELOGE("XmaContext: Failed to reopen FFmpeg context");
-      return -1;
-    }
-    return 1;
-  }
-  return 0;
-}
-
 void XmaContext::ConvertFrame(const uint8_t** samples, bool is_two_channel,
                              uint8_t* output_buffer) {
  // Loop through every sample, convert and drop it into the output array.
--- a/src/xenia/apu/xma_context.h
+++ b/src/xenia/apu/xma_context.h
@ -76,7 +76,7 @@ struct XMA_CONTEXT_DATA {

  // DWORD 2
  uint32_t input_buffer_read_offset : 26;  // XMAGetInputBufferReadOffset
-  uint32_t unk_dword_2 : 6;                // ErrorStatus/ErrorSet (?)
+  uint32_t error_status : 6;               // ErrorStatus/ErrorSet (?)

  // DWORD 3
  uint32_t loop_start : 26;          // XMASetLoopData LoopStartOffset
@ -119,6 +119,34 @@ struct XMA_CONTEXT_DATA {
                      reinterpret_cast<const uint32_t*>(this),
                      sizeof(XMA_CONTEXT_DATA) / 4);
  }
+
+  bool IsInputBufferValid(uint8_t buffer_index) const {
+    return buffer_index == 0 ? input_buffer_0_valid : input_buffer_1_valid;
+  }
+
+  bool IsCurrentInputBufferValid() const {
+    return IsInputBufferValid(current_buffer);
+  }
+
+  bool IsAnyInputBufferValid() const {
+    return input_buffer_0_valid || input_buffer_1_valid;
+  }
+
+  const uint32_t GetInputBufferAddress(uint8_t buffer_index) const {
+    return buffer_index == 0 ? input_buffer_0_ptr : input_buffer_1_ptr;
+  }
+
+  const uint32_t GetCurrentInputBufferAddress() const {
+    return GetInputBufferAddress(current_buffer);
+  }
+
+  const uint32_t GetInputBufferPacketCount(uint8_t buffer_index) const {
+    return buffer_index == 0 ? input_buffer_0_packet_count
+                             : input_buffer_1_packet_count;
+  }
+  const uint32_t GetCurrentInputBufferPacketCount() const {
+    return GetInputBufferPacketCount(current_buffer);
+  }
 };
 static_assert_size(XMA_CONTEXT_DATA, 64);

@ -150,14 +178,16 @@ class XmaContext {
  explicit XmaContext();
  ~XmaContext();

-  int Setup(uint32_t id, Memory* memory, uint32_t guest_ptr);
-  bool Work();
+  virtual int Setup(uint32_t id, Memory* memory, uint32_t guest_ptr) {
+    return 0;
+  };
+  virtual bool Work() { return false; };

-  void Enable();
-  bool Block(bool poll);
-  void Clear();
-  void Disable();
-  void Release();
+  virtual void Enable(){};
+  virtual bool Block(bool poll) { return 0; };
+  virtual void Clear(){};
+  virtual void Disable(){};
+  virtual void Release(){};

  Memory* memory() const { return memory_; }

@ -169,37 +199,12 @@ class XmaContext {
  void set_is_allocated(bool is_allocated) { is_allocated_ = is_allocated; }
  void set_is_enabled(bool is_enabled) { is_enabled_ = is_enabled; }

- private:
-  static void SwapInputBuffer(XMA_CONTEXT_DATA* data);
-  static bool TrySetupNextLoop(XMA_CONTEXT_DATA* data,
-                               bool ignore_input_buffer_offset);
-  static void NextPacket(XMA_CONTEXT_DATA* data);
-  static int GetSampleRate(int id);
-  // Get the offset of the next frame. Does not traverse packets.
-  static size_t GetNextFrame(uint8_t* block, size_t size, size_t bit_offset);
-  // Get the containing packet number of the frame pointed to by the offset.
-  static int GetFramePacketNumber(uint8_t* block, size_t size,
-                                  size_t bit_offset);
-  // Get the packet number and the index of the frame inside that packet
-  static std::tuple<int, int> GetFrameNumber(uint8_t* block, size_t size,
-                                             size_t bit_offset);
-  // Get the number of frames contained in the packet (including truncated) and
-  // if the last frame is split.
-  static std::tuple<int, bool> GetPacketFrameCount(uint8_t* packet);
-
+ protected:
+  static void DumpRaw(AVFrame* frame, int id);
  // Convert sample format and swap bytes
  static void ConvertFrame(const uint8_t** samples, bool is_two_channel,
                           uint8_t* output_buffer);

-  bool ValidFrameOffset(uint8_t* block, size_t size_bytes,
-                        size_t frame_offset_bits);
-  void Decode(XMA_CONTEXT_DATA* data);
-  int PrepareDecoder(uint8_t* packet, int sample_rate, bool is_two_channel);
-
-  // This method should be used ONLY when we're at the last packet of the stream
-  // and we want to find offset in next buffer
-  uint32_t GetPacketFirstFrameOffset(const XMA_CONTEXT_DATA* data);
-
  Memory* memory_ = nullptr;

  uint32_t id_ = 0;
@ -207,36 +212,12 @@ class XmaContext {
  xe_mutex lock_;
  volatile bool is_allocated_ = false;
  volatile bool is_enabled_ = false;
-  // bool is_dirty_ = true;

  // ffmpeg structures
  AVPacket* av_packet_ = nullptr;
  AVCodec* av_codec_ = nullptr;
  AVCodecContext* av_context_ = nullptr;
  AVFrame* av_frame_ = nullptr;
-  // uint32_t decoded_consumed_samples_ = 0; // TODO do this dynamically
-  // int decoded_idx_ = -1;
-
-  // bool partial_frame_saved_ = false;
-  // bool partial_frame_size_known_ = false;
-  // size_t partial_frame_total_size_bits_ = 0;
-  // size_t partial_frame_start_offset_bits_ = 0;
-  // size_t partial_frame_offset_bits_ = 0;  // blah internal don't use this
-  // std::vector<uint8_t> partial_frame_buffer_;
-  uint32_t packets_skip_ = 0;
-
-  bool is_stream_done_ = false;
-  // bool split_frame_pending_ = false;
-  uint32_t split_frame_len_ = 0;
-  uint32_t split_frame_len_partial_ = 0;
-  uint8_t split_frame_padding_start_ = 0;
-  // first byte contains bit offset information
-  std::array<uint8_t, 1 + 4096> xma_frame_;
-
-  // uint8_t* current_frame_ = nullptr;
-  // conversion buffer for 2 channel frame
-  std::array<uint8_t, kBytesPerFrameChannel * 2> raw_frame_;
-  // std::vector<uint8_t> current_frame_ = std::vector<uint8_t>(0);
 };

 }  // namespace apu
--- a/src/xenia/apu/xma_context_new.cc
+++ b/src/xenia/apu/xma_context_new.cc
@ -0,0 +1,694 @@
+/**
+******************************************************************************
+* Xenia : Xbox 360 Emulator Research Project                                 *
+******************************************************************************
+* Copyright 2024 Xenia Canary. All rights reserved.                          *
+* Released under the BSD license - see LICENSE in the root for more details. *
+******************************************************************************
+*/
+
+#include "xenia/apu/xma_context_new.h"
+#include "xenia/apu/xma_helpers.h"
+
+#include <algorithm>
+
+#include "xenia/base/logging.h"
+#include "xenia/base/platform.h"
+#include "xenia/base/profiling.h"
+
+extern "C" {
+#if XE_COMPILER_MSVC
+#pragma warning(push)
+#pragma warning(disable : 4101 4244 5033)
+#endif
+#include "third_party/FFmpeg/libavcodec/avcodec.h"
+#if XE_COMPILER_MSVC
+#pragma warning(pop)
+#endif
+}  // extern "C"
+
+// Credits for most of this code goes to:
+// https://github.com/koolkdev/libertyv/blob/master/libav_wrapper/xma2dec.c
+
+namespace xe {
+namespace apu {
+
+XmaContextNew::XmaContextNew() = default;
+
+XmaContextNew::~XmaContextNew() {
+  if (av_context_) {
+    if (avcodec_is_open(av_context_)) {
+      avcodec_close(av_context_);
+    }
+    av_free(av_context_);
+  }
+  if (av_frame_) {
+    av_frame_free(&av_frame_);
+  }
+}
+
+int XmaContextNew::Setup(uint32_t id, Memory* memory, uint32_t guest_ptr) {
+  id_ = id;
+  memory_ = memory;
+  guest_ptr_ = guest_ptr;
+
+  // Allocate ffmpeg stuff:
+  av_packet_ = av_packet_alloc();
+  assert_not_null(av_packet_);
+  av_packet_->buf = av_buffer_alloc(128 * 1024);
+
+  // find the XMA2 audio decoder
+  av_codec_ = avcodec_find_decoder(AV_CODEC_ID_XMAFRAMES);
+  if (!av_codec_) {
+    XELOGE("XmaContext {}: Codec not found", id);
+    return 1;
+  }
+
+  av_context_ = avcodec_alloc_context3(av_codec_);
+  if (!av_context_) {
+    XELOGE("XmaContext {}: Couldn't allocate context", id);
+    return 1;
+  }
+
+  // Initialize these to 0. They'll actually be set later.
+  av_context_->channels = 0;
+  av_context_->sample_rate = 0;
+
+  av_frame_ = av_frame_alloc();
+  if (!av_frame_) {
+    XELOGE("XmaContext {}: Couldn't allocate frame", id);
+    return 1;
+  }
+
+  // FYI: We're purposely not opening the codec here. That is done later.
+  return 0;
+}
+
+RingBuffer XmaContextNew::PrepareOutputRingBuffer(XMA_CONTEXT_DATA* data) {
+  const uint32_t output_capacity =
+      data->output_buffer_block_count * kOutputBytesPerBlock;
+  const uint32_t output_read_offset =
+      data->output_buffer_read_offset * kOutputBytesPerBlock;
+  const uint32_t output_write_offset =
+      data->output_buffer_write_offset * kOutputBytesPerBlock;
+
+  if (output_capacity > kOutputMaxSizeBytes) {
+    XELOGW(
+        "XmaContext {}: Output buffer uses more space than expected! "
+        "(Actual: {} Max: {})",
+        id(), output_capacity, kOutputMaxSizeBytes);
+  }
+
+  uint8_t* output_buffer = memory()->TranslatePhysical(data->output_buffer_ptr);
+
+  // Output buffers are in raw PCM samples, 256 bytes per block.
+  // Output buffer is a ring buffer. We need to write from the write offset
+  // to the read offset.
+  RingBuffer output_rb(output_buffer, output_capacity);
+  output_rb.set_read_offset(output_read_offset);
+  output_rb.set_write_offset(output_write_offset);
+  remaining_subframe_blocks_in_output_buffer_ =
+      (int32_t)output_rb.write_count() / kOutputBytesPerBlock;
+
+  return output_rb;
+}
+
+bool XmaContextNew::Work() {
+  if (!is_enabled() || !is_allocated()) {
+    return false;
+  }
+
+  std::lock_guard<xe_mutex> lock(lock_);
+  set_is_enabled(false);
+
+  auto context_ptr = memory()->TranslateVirtual(guest_ptr());
+  XMA_CONTEXT_DATA data(context_ptr);
+
+  if (!data.output_buffer_valid) {
+    return true;
+  }
+
+  RingBuffer output_rb = PrepareOutputRingBuffer(&data);
+
+  const int32_t minimum_subframe_decode_count =
+      (data.subframe_decode_count * 2) - 1;
+
+  // We don't have enough space to even make one pass
+  // Waiting for decoder to return more space.
+  if (minimum_subframe_decode_count >
+      remaining_subframe_blocks_in_output_buffer_) {
+    XELOGD("XmaContext {}: No space for subframe decoding {}/{}!", id(),
+           minimum_subframe_decode_count,
+           remaining_subframe_blocks_in_output_buffer_);
+    data.Store(context_ptr);
+    return true;
+  }
+
+  while (remaining_subframe_blocks_in_output_buffer_ >=
+         minimum_subframe_decode_count) {
+    XELOGAPU(
+        "XmaContext {}: Write Count: {}, Capacity: {} - {} {} Subframes: {} "
+        "Skip: {}",
+        id(), (uint32_t)output_rb.write_count(),
+        remaining_subframe_blocks_in_output_buffer_,
+        data.input_buffer_0_valid + (data.input_buffer_1_valid << 1),
+        data.output_buffer_valid, data.subframe_decode_count,
+        data.subframe_skip_count);
+
+    Decode(&data);
+    Consume(&output_rb, &data);
+
+    if (!data.IsAnyInputBufferValid() || data.error_status == 4) {
+      break;
+    }
+  }
+
+  data.output_buffer_write_offset =
+      output_rb.write_offset() / kOutputBytesPerBlock;
+
+  XELOGAPU("XmaContext {}: Read Output: {} Write Output: {}", id(),
+           data.output_buffer_read_offset, data.output_buffer_write_offset);
+
+  // That's a bit misleading due to nature of ringbuffer
+  // when write and read offset matches it might mean that we wrote nothing
+  // or we fully saturated allocated space.
+  if (output_rb.empty()) {
+    data.output_buffer_valid = 0;
+  }
+
+  // TODO: Rewrite!
+  // There is a case when game can modify certain parts of context mid-play
+  // and decoder should be aware of it
+  data.Store(context_ptr);
+  return true;
+}
+
+void XmaContextNew::Enable() {
+  std::lock_guard<xe_mutex> lock(lock_);
+
+  auto context_ptr = memory()->TranslateVirtual(guest_ptr());
+  XMA_CONTEXT_DATA data(context_ptr);
+
+  XELOGAPU("XmaContext: kicking context {} (buffer {} {}/{} bits)", id(),
+           data.current_buffer, data.input_buffer_read_offset,
+           data.GetCurrentInputBufferPacketCount() * kBitsPerPacket);
+
+  data.Store(context_ptr);
+  set_is_enabled(true);
+}
+
+bool XmaContextNew::Block(bool poll) {
+  if (!lock_.try_lock()) {
+    if (poll) {
+      return false;
+    }
+    lock_.lock();
+  }
+  lock_.unlock();
+  return true;
+}
+
+void XmaContextNew::Clear() {
+  std::lock_guard<xe_mutex> lock(lock_);
+  XELOGAPU("XmaContext: reset context {}", id());
+
+  auto context_ptr = memory()->TranslateVirtual(guest_ptr());
+  XMA_CONTEXT_DATA data(context_ptr);
+
+  data.input_buffer_0_valid = 0;
+  data.input_buffer_1_valid = 0;
+  data.output_buffer_valid = 0;
+
+  data.input_buffer_read_offset = 0;
+  data.output_buffer_read_offset = 0;
+  data.output_buffer_write_offset = 0;
+  data.input_buffer_read_offset = kBitsPerPacketHeader;
+
+  current_frame_remaining_subframes_ = 0;
+  data.Store(context_ptr);
+}
+
+void XmaContextNew::Disable() {
+  std::lock_guard<xe_mutex> lock(lock_);
+  XELOGAPU("XmaContext: disabling context {}", id());
+  set_is_enabled(false);
+}
+
+void XmaContextNew::Release() {
+  // Lock it in case the decoder thread is working on it now.
+  std::lock_guard<xe_mutex> lock(lock_);
+  assert_true(is_allocated_ == true);
+
+  set_is_allocated(false);
+  auto context_ptr = memory()->TranslateVirtual(guest_ptr());
+  std::memset(context_ptr, 0, sizeof(XMA_CONTEXT_DATA));  // Zero it.
+}
+
+int XmaContextNew::GetSampleRate(int id) {
+  return kIdToSampleRate[std::min(id, 3)];
+}
+
+void XmaContextNew::SwapInputBuffer(XMA_CONTEXT_DATA* data) {
+  // No more frames.
+  if (data->current_buffer == 0) {
+    data->input_buffer_0_valid = 0;
+  } else {
+    data->input_buffer_1_valid = 0;
+  }
+  data->current_buffer ^= 1;
+  data->input_buffer_read_offset = kBitsPerPacketHeader;
+}
+
+void XmaContextNew::Consume(RingBuffer* output_rb, XMA_CONTEXT_DATA* data) {
+  if (!current_frame_remaining_subframes_) {
+    return;
+  }
+
+  const int8_t subframes_to_write =
+      std::min((int8_t)current_frame_remaining_subframes_,
+               (int8_t)data->subframe_decode_count);
+
+  const int8_t raw_frame_read_offset =
+      ((kBytesPerFrameChannel / kOutputBytesPerBlock) << data->is_stereo) -
+      current_frame_remaining_subframes_;
+  // + data->subframe_skip_count;
+
+  output_rb->Write(
+      raw_frame_.data() + (kOutputBytesPerBlock * raw_frame_read_offset),
+      subframes_to_write * kOutputBytesPerBlock);
+  remaining_subframe_blocks_in_output_buffer_ -= subframes_to_write;
+  current_frame_remaining_subframes_ -= subframes_to_write;
+
+  XELOGAPU("XmaContext {}: Consume: {} - {} - {} - {} - {}", id(),
+           remaining_subframe_blocks_in_output_buffer_,
+           data->output_buffer_write_offset, data->output_buffer_read_offset,
+           output_rb->write_offset(), current_frame_remaining_subframes_);
+}
+
+void XmaContextNew::Decode(XMA_CONTEXT_DATA* data) {
+  SCOPE_profile_cpu_f("apu");
+
+  // No available data.
+  if (!data->IsAnyInputBufferValid()) {
+    // data->error_status = 4;
+    return;
+  }
+
+  if (current_frame_remaining_subframes_ > 0) {
+    return;
+  }
+
+  uint8_t* current_input_buffer = GetCurrentInputBuffer(data);
+
+  if (!data->IsCurrentInputBufferValid()) {
+    XELOGE(
+        "XmaContext {}: Invalid current buffer! Selected Buffer: {} Valid: {} "
+        "Pointer: {:08X}",
+        id(), data->current_buffer, data->IsCurrentInputBufferValid(),
+        data->GetCurrentInputBufferAddress());
+    return;
+  }
+
+  input_buffer_.fill(0);
+
+  UpdateLoopStatus(data);
+
+  if (!data->output_buffer_block_count) {
+    XELOGE("XmaContext {}: Error - Received 0 for output_buffer_block_count!",
+           id());
+    return;
+  }
+
+  XELOGAPU(
+      "Processing context {} (offset {}, buffer {}, ptr {:p}, output buffer "
+      "{:08X}, output buffer count {})",
+      id(), data->input_buffer_read_offset, data->current_buffer,
+      current_input_buffer, data->output_buffer_ptr,
+      data->output_buffer_block_count);
+
+  const uint32_t current_input_size = GetCurrentInputBufferSize(data);
+  const uint32_t current_input_packet_count =
+      current_input_size / kBytesPerPacket;
+
+  const int16_t packet_index =
+      GetPacketNumber(current_input_size, data->input_buffer_read_offset);
+
+  if (packet_index == -1) {
+    XELOGE("XmaContext {}: Invalid packet index. Input read offset: {}", id(),
+           data->input_buffer_read_offset);
+    return;
+  }
+
+  uint8_t* packet = current_input_buffer + (packet_index * kBytesPerPacket);
+  // Because game can reset read offset. We must assure that new offset is
+  // valid. Splitted frames aren't handled here, so it's not a big deal.
+  const uint32_t frame_offset = xma::GetPacketFrameOffset(packet);
+  if (data->input_buffer_read_offset < frame_offset) {
+    data->input_buffer_read_offset = frame_offset;
+  }
+
+  const uint32_t relative_offset =
+      data->input_buffer_read_offset % kBitsPerPacket;
+  const kPacketInfo packet_info = GetPacketInfo(packet, relative_offset);
+  const uint32_t packet_to_skip = xma::GetPacketSkipCount(packet) + 1;
+  const uint32_t next_packet_index = packet_index + packet_to_skip;
+
+  BitStream stream =
+      BitStream(current_input_buffer, (packet_index + 1) * kBitsPerPacket);
+  stream.SetOffset(data->input_buffer_read_offset);
+
+  const uint64_t bits_to_copy = GetAmountOfBitsToRead(
+      (uint32_t)stream.BitsRemaining(), packet_info.current_frame_size_);
+
+  if (bits_to_copy == 0) {
+    XELOGE("XmaContext {}: There is no bits to copy!", id());
+    SwapInputBuffer(data);
+    return;
+  }
+
+  if (packet_info.isLastFrameInPacket()) {
+    // Frame is a splitted frame
+    if (stream.BitsRemaining() < packet_info.current_frame_size_) {
+      const uint8_t* next_packet =
+          GetNextPacket(data, next_packet_index, current_input_packet_count);
+
+      if (!next_packet) {
+        // Error path
+        // Decoder probably should return error here
+        // Not sure what error code should be returned
+        data->error_status = 4;
+        return;
+      }
+      // Copy next packet to buffer
+      std::memcpy(input_buffer_.data() + kBytesPerPacketData,
+                  next_packet + kBytesPerPacketHeader, kBytesPerPacketData);
+    }
+  }
+
+  // Copy current packet to buffer
+  std::memcpy(input_buffer_.data(), packet + kBytesPerPacketHeader,
+              kBytesPerPacketData);
+
+  stream = BitStream(input_buffer_.data(),
+                     (kBitsPerPacket - kBitsPerPacketHeader) * 2);
+  stream.SetOffset(relative_offset - kBitsPerPacketHeader);
+
+  xma_frame_.fill(0);
+
+  XELOGAPU(
+      "XmaContext {}: Reading Frame {}/{} (size: {}) From Packet "
+      "{}/{}",
+      id(), (int32_t)packet_info.current_frame_, packet_info.frame_count_,
+      packet_info.current_frame_size_, packet_index,
+      current_input_packet_count);
+
+  const uint32_t padding_start = static_cast<uint8_t>(
+      stream.Copy(xma_frame_.data() + 1, packet_info.current_frame_size_));
+
+  raw_frame_.fill(0);
+
+  PrepareDecoder(data->sample_rate, bool(data->is_stereo));
+  PreparePacket(packet_info.current_frame_size_, padding_start);
+  if (DecodePacket(av_context_, av_packet_, av_frame_)) {
+    // dump_raw(av_frame_, id());
+    ConvertFrame(reinterpret_cast<const uint8_t**>(&av_frame_->data),
+                 bool(data->is_stereo), raw_frame_.data());
+  }
+
+  // TODO: Write function to regenerate decoder
+  // TODO: Be aware of subframe_skips & loops subframes skips
+  current_frame_remaining_subframes_ = 4 << data->is_stereo;
+
+  // Compute where to go next.
+  if (!packet_info.isLastFrameInPacket()) {
+    const uint32_t next_frame_offset =
+        (data->input_buffer_read_offset + bits_to_copy) % kBitsPerPacket;
+
+    XELOGAPU("XmaContext {}: Index: {}/{} - Next frame offset: {}", id(),
+             (int32_t)packet_info.current_frame_, packet_info.frame_count_,
+             next_frame_offset);
+
+    data->input_buffer_read_offset =
+        (packet_index * kBitsPerPacket) + next_frame_offset;
+    return;
+  }
+
+  const uint8_t* next_packet =
+      GetNextPacket(data, next_packet_index, current_input_packet_count);
+
+  if (!next_packet) {
+    // Error path
+    // Decoder probably should return error here
+    // Not sure what error code should be returned
+    // data->error_status = 4;
+    // data->output_buffer_valid = 0;
+    // return;
+  }
+
+  uint32_t next_input_offset = GetNextPacketReadOffset(
+      current_input_buffer, next_packet_index, current_input_packet_count);
+
+  if (next_input_offset == kBitsPerPacketHeader) {
+    SwapInputBuffer(data);
+    // We're at start of next buffer
+    // If it have any frame in this packet decoder should go to first frame in
+    // packet If it doesn't have any frame then it should immediatelly go to
+    // next packet
+    if (data->IsAnyInputBufferValid()) {
+      next_input_offset = xma::GetPacketFrameOffset(
+          memory()->TranslatePhysical(data->GetCurrentInputBufferAddress()));
+
+      if (next_input_offset > kMaxFrameSizeinBits) {
+        XELOGAPU(
+            "XmaContext {}: Next buffer contains no frames in packet! Frame "
+            "offset: {}",
+            id(), next_input_offset);
+        SwapInputBuffer(data);
+        return;
+      }
+      XELOGAPU("XmaContext {}: Next buffer first frame starts at: {}", id(),
+               next_input_offset);
+    }
+  }
+  data->input_buffer_read_offset = next_input_offset;
+  return;
+}
+
+//  Frame & Packet searching methods
+
+void XmaContextNew::UpdateLoopStatus(XMA_CONTEXT_DATA* data) {
+  if (data->loop_count == 0) {
+    return;
+  }
+
+  const uint32_t loop_start = std::max(kBitsPerPacketHeader, data->loop_start);
+  const uint32_t loop_end = std::max(kBitsPerPacketHeader, data->loop_end);
+
+  XELOGAPU("XmaContext {}: Looped Data: {} < {} (Start: {}) Remaining: {}",
+           id(), data->input_buffer_read_offset, data->loop_end,
+           data->loop_start, data->loop_count);
+
+  if (data->input_buffer_read_offset != loop_end) {
+    return;
+  }
+
+  data->input_buffer_read_offset = loop_start;
+
+  if (data->loop_count != 255) {
+    data->loop_count--;
+  }
+}
+
+const uint8_t* XmaContextNew::GetNextPacket(
+    XMA_CONTEXT_DATA* data, uint32_t next_packet_index,
+    uint32_t current_input_packet_count) {
+  if (next_packet_index < current_input_packet_count) {
+    return memory()->TranslatePhysical(data->GetCurrentInputBufferAddress()) +
+           next_packet_index * kBytesPerPacket;
+  }
+
+  const uint8_t next_buffer_index = data->current_buffer ^ 1;
+
+  if (!data->IsInputBufferValid(next_buffer_index)) {
+    return nullptr;
+  }
+
+  const uint32_t next_buffer_address =
+      data->GetInputBufferAddress(next_buffer_index);
+
+  if (!next_buffer_address) {
+    // This should never occur but there is always a chance
+    XELOGE(
+        "XmaContext {}: Buffer is marked as valid, but doesn't have valid "
+        "pointer!",
+        id());
+    return nullptr;
+  }
+
+  return memory()->TranslatePhysical(next_buffer_address);
+}
+
+const uint32_t XmaContextNew::GetNextPacketReadOffset(
+    uint8_t* buffer, uint32_t next_packet_index,
+    uint32_t current_input_packet_count) {
+  if (next_packet_index >= current_input_packet_count) {
+    return kBitsPerPacketHeader;
+  }
+
+  uint8_t* next_packet = buffer + (next_packet_index * kBytesPerPacket);
+  const uint32_t packet_frame_offset = xma::GetPacketFrameOffset(next_packet);
+
+  if (packet_frame_offset > kMaxFrameSizeinBits) {
+    const uint32_t offset = GetNextPacketReadOffset(
+        buffer, next_packet_index + 1, current_input_packet_count);
+    return offset;
+  }
+
+  const uint32_t new_input_buffer_offset =
+      (next_packet_index * kBitsPerPacket) + packet_frame_offset;
+
+  XELOGAPU("XmaContext {}: new offset: {} packet_offset: {} packet: {}/{}",
+           id(), new_input_buffer_offset, packet_frame_offset,
+           next_packet_index, current_input_packet_count);
+  return new_input_buffer_offset;
+}
+
+const uint32_t XmaContextNew::GetAmountOfBitsToRead(
+    const uint32_t remaining_stream_bits, const uint32_t frame_size) {
+  return std::min(remaining_stream_bits, frame_size);
+}
+
+uint32_t XmaContextNew::GetCurrentInputBufferSize(XMA_CONTEXT_DATA* data) {
+  return data->GetCurrentInputBufferPacketCount() * kBytesPerPacket;
+}
+
+uint8_t* XmaContextNew::GetCurrentInputBuffer(XMA_CONTEXT_DATA* data) {
+  return memory()->TranslatePhysical(data->GetCurrentInputBufferAddress());
+}
+
+const kPacketInfo XmaContextNew::GetPacketInfo(uint8_t* packet,
+                                               uint32_t frame_offset) {
+  kPacketInfo packet_info = {};
+
+  const uint32_t first_frame_offset = xma::GetPacketFrameOffset(packet);
+  BitStream stream(packet, kBitsPerPacket);
+  stream.SetOffset(first_frame_offset);
+
+  // Handling of splitted frame
+  if (frame_offset < first_frame_offset) {
+    packet_info.current_frame_ = 0;
+    packet_info.current_frame_size_ = first_frame_offset - frame_offset;
+  }
+
+  while (true) {
+    if (stream.BitsRemaining() < kBitsPerFrameHeader) {
+      break;
+    }
+
+    const uint64_t frame_size = stream.Peek(kBitsPerFrameHeader);
+    if (frame_size == xma::kMaxFrameLength) {
+      break;
+    }
+
+    if (stream.offset_bits() == frame_offset) {
+      packet_info.current_frame_ = packet_info.frame_count_;
+      packet_info.current_frame_size_ = (uint32_t)frame_size;
+    }
+
+    packet_info.frame_count_++;
+
+    if (frame_size > stream.BitsRemaining()) {
+      // Last frame.
+      break;
+    }
+
+    stream.Advance(frame_size - 1);
+
+    // Read the trailing bit to see if frames follow
+    if (stream.Read(1) == 0) {
+      break;
+    }
+  }
+
+  if (xma::IsPacketXma2Type(packet)) {
+    const uint8_t xma2_frame_count = xma::GetPacketFrameCount(packet);
+    if (xma2_frame_count != packet_info.frame_count_) {
+      XELOGE(
+          "XmaContext {}: XMA2 packet header defines different amount of "
+          "frames than internally found! (Header: {} Found: {})",
+          id(), xma2_frame_count, packet_info.frame_count_);
+    }
+  }
+  return packet_info;
+}
+
+int16_t XmaContextNew::GetPacketNumber(size_t size, size_t bit_offset) {
+  if (bit_offset < kBitsPerPacketHeader) {
+    assert_always();
+    return -1;
+  }
+
+  if (bit_offset >= (size << 3)) {
+    assert_always();
+    return -1;
+  }
+
+  size_t byte_offset = bit_offset >> 3;
+  size_t packet_number = byte_offset / kBytesPerPacket;
+
+  return (int16_t)packet_number;
+}
+
+int XmaContextNew::PrepareDecoder(int sample_rate, bool is_two_channel) {
+  sample_rate = GetSampleRate(sample_rate);
+
+  // Re-initialize the context with new sample rate and channels.
+  uint32_t channels = is_two_channel ? 2 : 1;
+  if (av_context_->sample_rate != sample_rate ||
+      av_context_->channels != channels) {
+    // We have to reopen the codec so it'll realloc whatever data it needs.
+    // TODO(DrChat): Find a better way.
+    avcodec_close(av_context_);
+
+    av_context_->sample_rate = sample_rate;
+    av_context_->channels = channels;
+
+    if (avcodec_open2(av_context_, av_codec_, NULL) < 0) {
+      XELOGE("XmaContext: Failed to reopen FFmpeg context");
+      return -1;
+    }
+    return 1;
+  }
+  return 0;
+}
+
+void XmaContextNew::PreparePacket(const uint32_t frame_size,
+                                  const uint32_t frame_padding) {
+  av_packet_->data = xma_frame_.data();
+  av_packet_->size =
+      static_cast<int>(1 + ((frame_padding + frame_size) / 8) +
+                       (((frame_padding + frame_size) % 8) ? 1 : 0));
+
+  auto padding_end = av_packet_->size * 8 - (8 + frame_padding + frame_size);
+  assert_true(padding_end < 8);
+  xma_frame_[0] = ((frame_padding & 7) << 5) | ((padding_end & 7) << 2);
+}
+
+bool XmaContextNew::DecodePacket(AVCodecContext* av_context,
+                                 const AVPacket* av_packet, AVFrame* av_frame) {
+  auto ret = avcodec_send_packet(av_context, av_packet);
+  if (ret < 0) {
+    XELOGE("XmaContext {}: Error sending packet for decoding", id());
+    return false;
+  }
+  ret = avcodec_receive_frame(av_context, av_frame);
+
+  if (ret < 0) {
+    XELOGE("XmaContext {}: Error during decoding", id());
+    return false;
+  }
+  return true;
+}
+
+}  // namespace apu
+}  // namespace xe
--- a/src/xenia/apu/xma_context_new.h
+++ b/src/xenia/apu/xma_context_new.h
@ -0,0 +1,136 @@
+/**
+ ******************************************************************************
+ * Xenia : Xbox 360 Emulator Research Project                                 *
+ ******************************************************************************
+ * Copyright 2024 Xenia Canary. All rights reserved.                          *
+ * Released under the BSD license - see LICENSE in the root for more details. *
+ ******************************************************************************
+ */
+
+#ifndef XENIA_APU_XMA_CONTEXT_NEW_H_
+#define XENIA_APU_XMA_CONTEXT_NEW_H_
+
+#include <array>
+#include <atomic>
+#include <mutex>
+#include <queue>
+
+#include "xenia/apu/xma_context.h"
+#include "xenia/base/bit_stream.h"
+#include "xenia/base/ring_buffer.h"
+#include "xenia/memory.h"
+#include "xenia/xbox.h"
+
+// Forward declarations
+struct AVCodec;
+struct AVCodecParserContext;
+struct AVCodecContext;
+struct AVFrame;
+struct AVPacket;
+
+namespace xe {
+namespace apu {
+
+struct kPacketInfo {
+  uint8_t frame_count_;
+  uint8_t current_frame_;
+  uint32_t current_frame_size_;
+
+  const bool isLastFrameInPacket() const {
+    return current_frame_ == frame_count_ - 1;
+  }
+};
+
+static constexpr int kIdToSampleRate[4] = {24000, 32000, 44100, 48000};
+
+class XmaContextNew : public XmaContext {
+ public:
+  static const uint32_t kBytesPerPacket = 2048;
+  static const uint32_t kBytesPerPacketHeader = 4;
+  static const uint32_t kBytesPerPacketData =
+      kBytesPerPacket - kBytesPerPacketHeader;
+
+  static const uint32_t kBitsPerPacket = kBytesPerPacket * 8;
+  static const uint32_t kBitsPerPacketHeader = 32;
+  static const uint32_t kBitsPerFrameHeader = 15;
+
+  static const uint32_t kBytesPerSample = 2;
+  static const uint32_t kSamplesPerFrame = 512;
+  static const uint32_t kSamplesPerSubframe = 128;
+  static const uint32_t kBytesPerFrameChannel =
+      kSamplesPerFrame * kBytesPerSample;
+  static const uint32_t kBytesPerSubframeChannel =
+      kSamplesPerSubframe * kBytesPerSample;
+
+  static const uint32_t kOutputBytesPerBlock = 256;
+  static const uint32_t kOutputMaxSizeBytes = 31 * kOutputBytesPerBlock;
+
+  static const uint32_t kLastFrameMarker = 0x7FFF;
+  static const uint32_t kMaxFrameSizeinBits = 0x4000 - kBitsPerPacketHeader;
+
+  explicit XmaContextNew();
+  ~XmaContextNew();
+
+  int Setup(uint32_t id, Memory* memory, uint32_t guest_ptr);
+  bool Work();
+
+  void Enable();
+  bool Block(bool poll);
+  void Clear();
+  void Disable();
+  void Release();
+
+ private:
+  static void SwapInputBuffer(XMA_CONTEXT_DATA* data);
+  // Convert sampling rate from ID to frequency.
+  static int GetSampleRate(int id);
+  // Get the containing packet number of the frame pointed to by the offset.
+  static int16_t GetPacketNumber(size_t size, size_t bit_offset);
+
+  const kPacketInfo GetPacketInfo(uint8_t* packet, uint32_t frame_offset);
+
+  const uint32_t GetAmountOfBitsToRead(const uint32_t remaining_stream_bits,
+                                       const uint32_t frame_size);
+
+  const uint8_t* GetNextPacket(XMA_CONTEXT_DATA* data,
+                               uint32_t next_packet_index,
+                               uint32_t current_input_packet_count);
+
+  const uint32_t GetNextPacketReadOffset(uint8_t* buffer,
+                                         uint32_t next_packet_index,
+                                         uint32_t current_input_packet_count);
+
+  // Returns currently used buffer
+  uint8_t* GetCurrentInputBuffer(XMA_CONTEXT_DATA* data);
+
+  static uint32_t GetCurrentInputBufferSize(XMA_CONTEXT_DATA* data);
+
+  void Decode(XMA_CONTEXT_DATA* data);
+  void Consume(RingBuffer* output_rb, XMA_CONTEXT_DATA* data);
+
+  void UpdateLoopStatus(XMA_CONTEXT_DATA* data);
+  int PrepareDecoder(int sample_rate, bool is_two_channel);
+  void PreparePacket(const uint32_t frame_size, const uint32_t frame_padding);
+
+  RingBuffer PrepareOutputRingBuffer(XMA_CONTEXT_DATA* data);
+
+  bool DecodePacket(AVCodecContext* av_context, const AVPacket* av_packet,
+                    AVFrame* av_frame);
+
+  // This method should be used ONLY when we're at the last packet of the stream
+  // and we want to find offset in next buffer
+  uint32_t GetPacketFirstFrameOffset(const XMA_CONTEXT_DATA* data);
+
+  std::array<uint8_t, kBytesPerPacketData * 2> input_buffer_;
+  // first byte contains bit offset information
+  std::array<uint8_t, 1 + 4096> xma_frame_;
+  std::array<uint8_t, kBytesPerFrameChannel * 2> raw_frame_;
+
+  int32_t remaining_subframe_blocks_in_output_buffer_ = 0;
+  uint8_t current_frame_remaining_subframes_ = 0;
+};
+
+}  // namespace apu
+}  // namespace xe
+
+#endif  // XENIA_APU_XMA_CONTEXT_H_
--- a/src/xenia/apu/xma_context_old.cc
+++ b/src/xenia/apu/xma_context_old.cc
@ -0,0 +1,950 @@
+/**
+******************************************************************************
+* Xenia : Xbox 360 Emulator Research Project                                 *
+******************************************************************************
+* Copyright 2024 Ben Vanik. All rights reserved.                             *
+* Released under the BSD license - see LICENSE in the root for more details. *
+******************************************************************************
+*/
+
+#include "xenia/apu/xma_context_old.h"
+
+#include <algorithm>
+#include <cstring>
+
+#include "xenia/apu/xma_decoder.h"
+#include "xenia/apu/xma_helpers.h"
+#include "xenia/base/bit_stream.h"
+#include "xenia/base/logging.h"
+#include "xenia/base/platform.h"
+#include "xenia/base/profiling.h"
+#include "xenia/base/ring_buffer.h"
+
+extern "C" {
+#if XE_COMPILER_MSVC
+#pragma warning(push)
+#pragma warning(disable : 4101 4244 5033)
+#endif
+#include "third_party/FFmpeg/libavcodec/avcodec.h"
+#if XE_COMPILER_MSVC
+#pragma warning(pop)
+#endif
+}  // extern "C"
+
+// Credits for most of this code goes to:
+// https://github.com/koolkdev/libertyv/blob/master/libav_wrapper/xma2dec.c
+
+namespace xe {
+namespace apu {
+
+XmaContextOld::XmaContextOld() = default;
+
+XmaContextOld::~XmaContextOld() {
+  if (av_context_) {
+    if (avcodec_is_open(av_context_)) {
+      avcodec_close(av_context_);
+    }
+    av_free(av_context_);
+  }
+  if (av_frame_) {
+    av_frame_free(&av_frame_);
+  }
+  // if (current_frame_) {
+  //   delete[] current_frame_;
+  //  }
+}
+
+int XmaContextOld::Setup(uint32_t id, Memory* memory, uint32_t guest_ptr) {
+  id_ = id;
+  memory_ = memory;
+  guest_ptr_ = guest_ptr;
+
+  // Allocate ffmpeg stuff:
+  av_packet_ = av_packet_alloc();
+  assert_not_null(av_packet_);
+  // chrispy: preallocate this buffer so that ffmpeg isn't reallocating it for
+  // every packet, these allocations were causing RtlSubsegmentInitialize
+  av_packet_->buf = av_buffer_alloc(128 * 1024);
+  // find the XMA2 audio decoder
+  av_codec_ = avcodec_find_decoder(AV_CODEC_ID_XMAFRAMES);
+  if (!av_codec_) {
+    XELOGE("XmaContext {}: Codec not found", id);
+    return 1;
+  }
+
+  av_context_ = avcodec_alloc_context3(av_codec_);
+  if (!av_context_) {
+    XELOGE("XmaContext {}: Couldn't allocate context", id);
+    return 1;
+  }
+
+  // Initialize these to 0. They'll actually be set later.
+  av_context_->channels = 0;
+  av_context_->sample_rate = 0;
+
+  av_frame_ = av_frame_alloc();
+  if (!av_frame_) {
+    XELOGE("XmaContext {}: Couldn't allocate frame", id);
+    return 1;
+  }
+
+  // FYI: We're purposely not opening the codec here. That is done later.
+  return 0;
+}
+
+bool XmaContextOld::Work() {
+  if (!is_enabled() || !is_allocated()) {
+    return false;
+  }
+  {
+    std::lock_guard<xe_mutex> lock(lock_);
+    set_is_enabled(false);
+
+    auto context_ptr = memory()->TranslateVirtual(guest_ptr());
+    XMA_CONTEXT_DATA data(context_ptr);
+    Decode(&data);
+    data.Store(context_ptr);
+    return true;
+  }
+}
+
+void XmaContextOld::Enable() {
+  std::lock_guard<xe_mutex> lock(lock_);
+
+  auto context_ptr = memory()->TranslateVirtual(guest_ptr());
+  XMA_CONTEXT_DATA data(context_ptr);
+
+  XELOGAPU("XmaContext: kicking context {} (buffer {} {}/{} bits)", id(),
+           data.current_buffer, data.input_buffer_read_offset,
+           (data.current_buffer == 0 ? data.input_buffer_0_packet_count
+                                     : data.input_buffer_1_packet_count) *
+               kBitsPerPacket);
+
+  data.Store(context_ptr);
+
+  set_is_enabled(true);
+}
+
+bool XmaContextOld::Block(bool poll) {
+  if (!lock_.try_lock()) {
+    if (poll) {
+      return false;
+    }
+    lock_.lock();
+  }
+  lock_.unlock();
+  return true;
+}
+
+void XmaContextOld::Clear() {
+  std::lock_guard<xe_mutex> lock(lock_);
+  XELOGAPU("XmaContext: reset context {}", id());
+
+  auto context_ptr = memory()->TranslateVirtual(guest_ptr());
+  XMA_CONTEXT_DATA data(context_ptr);
+
+  data.input_buffer_0_valid = 0;
+  data.input_buffer_1_valid = 0;
+  data.output_buffer_valid = 0;
+
+  data.input_buffer_read_offset = 0;
+  data.output_buffer_read_offset = 0;
+  data.output_buffer_write_offset = 0;
+
+  xma_frame_.fill(0);
+  split_frame_len_ = 0;
+  split_frame_len_partial_ = 0;
+  split_frame_padding_start_ = 0;
+
+  data.Store(context_ptr);
+}
+
+void XmaContextOld::Disable() {
+  std::lock_guard<xe_mutex> lock(lock_);
+  XELOGAPU("XmaContext: disabling context {}", id());
+  set_is_enabled(false);
+}
+
+void XmaContextOld::Release() {
+  // Lock it in case the decoder thread is working on it now.
+  std::lock_guard<xe_mutex> lock(lock_);
+  assert_true(is_allocated_ == true);
+
+  set_is_allocated(false);
+  auto context_ptr = memory()->TranslateVirtual(guest_ptr());
+  std::memset(context_ptr, 0, sizeof(XMA_CONTEXT_DATA));  // Zero it.
+}
+
+void XmaContextOld::SwapInputBuffer(XMA_CONTEXT_DATA* data) {
+  // No more frames.
+  if (data->current_buffer == 0) {
+    data->input_buffer_0_valid = 0;
+  } else {
+    data->input_buffer_1_valid = 0;
+  }
+  data->current_buffer ^= 1;
+  data->input_buffer_read_offset = kBitsPerHeader;
+}
+
+bool XmaContextOld::TrySetupNextLoop(XMA_CONTEXT_DATA* data,
+                                     bool ignore_input_buffer_offset) {
+  // Setup the input buffer offset if next loop exists.
+  // TODO(Pseudo-Kernel): Need to handle loop in the following cases.
+  // 1. loop_start == loop_end == 0
+  // 2. loop_start > loop_end && loop_count > 0
+  if (data->loop_count > 0 && data->loop_start < data->loop_end &&
+      (ignore_input_buffer_offset ||
+       data->input_buffer_read_offset >= data->loop_end)) {
+    // Loop back to the beginning.
+    data->input_buffer_read_offset = data->loop_start;
+    if (data->loop_count < 255) {
+      data->loop_count--;
+    }
+    return true;
+  }
+  return false;
+}
+
+/*
+void XmaContext::NextPacket(
+    uint8_t* input_buffer,
+    uint32_t input_size,
+    uint32_t input_buffer_read_offset) {
+*/
+void XmaContextOld::NextPacket(XMA_CONTEXT_DATA* data) {
+  // auto packet_idx = GetFramePacketNumber(input_buffer, input_size,
+  // input_buffer_read_offset);
+
+  // packet_idx++;
+  // if (packet_idx++ >= input_size)
+}
+
+int XmaContextOld::GetSampleRate(int id) {
+  switch (id) {
+    case 0:
+      return 24000;
+    case 1:
+      return 32000;
+    case 2:
+      return 44100;
+    case 3:
+      return 48000;
+  }
+  assert_always();
+  return 0;
+}
+
+bool XmaContextOld::ValidFrameOffset(uint8_t* block, size_t size_bytes,
+                                     size_t frame_offset_bits) {
+  uint32_t packet_num =
+      GetFramePacketNumber(block, size_bytes, frame_offset_bits);
+  if (packet_num == -1) {
+    // Invalid packet number
+    XELOGAPU("ValidFrameOffset: Invalid packet number");
+    return false;
+  }
+
+  uint8_t* packet = block + (packet_num * kBytesPerPacket);
+  size_t relative_offset_bits = frame_offset_bits % kBitsPerPacket;
+
+  uint32_t first_frame_offset = xma::GetPacketFrameOffset(packet);
+  if (first_frame_offset == -1 || first_frame_offset > kBitsPerPacket) {
+    XELOGAPU("ValidFrameOffset: Invalid frame offset {}", first_frame_offset);
+    // Packet only contains a partial frame, so no frames can start here.
+    return false;
+  }
+
+  BitStream stream(packet, kBitsPerPacket);
+  stream.SetOffset(first_frame_offset);
+  while (true) {
+    if (stream.offset_bits() == relative_offset_bits) {
+      return true;
+    }
+
+    if (stream.BitsRemaining() < 15) {
+      XELOGAPU("ValidFrameOffset: No room for next frame header {}",
+               first_frame_offset);
+      // Not enough room for another frame header.
+      return false;
+    }
+
+    uint64_t size = stream.Read(15);
+    if ((size - 15) > stream.BitsRemaining()) {
+      XELOGAPU("ValidFrameOffset: Last frame {} - {}", first_frame_offset,
+               size);
+      // Last frame.
+      return false;
+    } else if (size == 0x7FFF) {
+      // Invalid frame (and last of this packet)
+      return false;
+    }
+
+    stream.Advance(size - 16);
+
+    // Read the trailing bit to see if frames follow
+    if (stream.Read(1) == 0) {
+      break;
+    }
+  }
+
+  return false;
+}
+
+void XmaContextOld::Decode(XMA_CONTEXT_DATA* data) {
+  SCOPE_profile_cpu_f("apu");
+
+  // What I see:
+  // XMA outputs 2 bytes per sample
+  // 512 samples per frame (128 per subframe)
+  // Max output size is data.output_buffer_block_count * 256
+
+  // This decoder is fed packets (max 4095 per buffer)
+  // Packets contain "some" frames
+  // 32bit header (big endian)
+
+  // Frames are the smallest thing the SPUs can decode.
+  // They can and usually will span packets.
+
+  // Sample rates (data.sample_rate):
+  // 0 - 24 kHz
+  // 1 - 32 kHz
+  // 2 - 44.1 kHz
+  // 3 - 48 kHz
+
+  // SPUs also support stereo decoding. (data.is_stereo)
+
+  // Check the output buffer - we cannot decode anything else if it's
+  // unavailable.
+  if (!data->output_buffer_valid) {
+    return;
+  }
+
+  // No available data.
+  if (!data->input_buffer_0_valid && !data->input_buffer_1_valid) {
+    return;
+  }
+
+  // XAudio Loops
+  // loop_count:
+  //  - XAUDIO2_MAX_LOOP_COUNT = 254
+  //  - XAUDIO2_LOOP_INFINITE = 255
+  // loop_start/loop_end are bit offsets to a specific frame
+
+  // Translate pointers for future use.
+  // Sometimes the game will use rolling input buffers. If they do, we cannot
+  // assume they form a complete block! In addition, the buffers DO NOT have
+  // to be contiguous!
+  uint8_t* in0 = data->input_buffer_0_valid
+                     ? memory()->TranslatePhysical(data->input_buffer_0_ptr)
+                     : nullptr;
+  uint8_t* in1 = data->input_buffer_1_valid
+                     ? memory()->TranslatePhysical(data->input_buffer_1_ptr)
+                     : nullptr;
+  uint8_t* current_input_buffer = data->current_buffer ? in1 : in0;
+
+  if (!current_input_buffer) {
+    XELOGE("XmaContext {}: Error - input buffer pointer is invalid!", id());
+    return;
+  }
+
+  if (!data->output_buffer_block_count) {
+    XELOGE("XmaContext {}: Error - Received 0 for output_buffer_block_count!",
+           id());
+    return;
+  }
+
+  XELOGAPU(
+      "Processing context {} (offset {}, buffer {}, ptr {:p}, output buffer "
+      "{:08X}, output buffer count {})",
+      id(), data->input_buffer_read_offset, data->current_buffer,
+      current_input_buffer, data->output_buffer_ptr,
+      data->output_buffer_block_count);
+
+  if (is_stream_done_) {
+    is_stream_done_ = false;
+    packets_skip_ = 0;
+    SwapInputBuffer(data);
+    return;
+  }
+
+  size_t input_buffer_0_size =
+      data->input_buffer_0_packet_count * kBytesPerPacket;
+  size_t input_buffer_1_size =
+      data->input_buffer_1_packet_count * kBytesPerPacket;
+
+  size_t current_input_size =
+      data->current_buffer ? input_buffer_1_size : input_buffer_0_size;
+  size_t current_input_packet_count = current_input_size / kBytesPerPacket;
+  bool is_streaming = data->input_buffer_0_packet_count == 1 &&
+                      data->input_buffer_1_packet_count == 1;
+
+  // Output buffers are in raw PCM samples, 256 bytes per block.
+  // Output buffer is a ring buffer. We need to write from the write offset
+  // to the read offset.
+  uint8_t* output_buffer = memory()->TranslatePhysical(data->output_buffer_ptr);
+  uint32_t output_capacity =
+      data->output_buffer_block_count * kBytesPerSubframeChannel;
+  uint32_t output_read_offset =
+      data->output_buffer_read_offset * kBytesPerSubframeChannel;
+  uint32_t output_write_offset =
+      data->output_buffer_write_offset * kBytesPerSubframeChannel;
+
+  RingBuffer output_rb(output_buffer, output_capacity);
+  output_rb.set_read_offset(output_read_offset);
+  output_rb.set_write_offset(output_write_offset);
+
+  // We can only decode an entire frame and write it out at a time, so
+  // don't save any samples.
+  // TODO(JoelLinn): subframes when looping
+  size_t output_remaining_bytes = output_rb.write_count();
+  output_remaining_bytes -=
+      output_remaining_bytes % (kBytesPerFrameChannel << data->is_stereo);
+
+  // is_dirty_ = true; // TODO
+  // is_dirty_ = false;  // TODO
+  assert_false(data->stop_when_done);
+  assert_false(data->interrupt_when_done);
+  static int total_samples = 0;
+  // Decode until we can't write any more data.
+  while (output_remaining_bytes > 0) {
+    if (!data->input_buffer_0_valid && !data->input_buffer_1_valid) {
+      // Out of data.
+      break;
+    }
+    // Setup the input buffer if we are at loop_end.
+    // The input buffer must not be swapped out until all loops are processed.
+    bool reuse_input_buffer = TrySetupNextLoop(data, false);
+
+    // assert_true(packets_skip_ == 0);
+    // assert_true(split_frame_len_ == 0);
+    // assert_true(split_frame_len_partial_ == 0);
+
+    // Where are we in the buffer (in XMA jargon)
+    int packet_idx, frame_idx, frame_count;
+    uint8_t* packet;
+    bool frame_last_split;
+
+    BitStream stream(current_input_buffer, current_input_size * 8);
+    stream.SetOffset(data->input_buffer_read_offset);
+
+    if (data->input_buffer_read_offset > current_input_size * 8) {
+      XELOGE(
+          "XmaContext {}: Error - Provided input offset exceed input buffer "
+          "size! ({} > {})",
+          id(), data->input_buffer_read_offset, current_input_size * 8);
+      SwapInputBuffer(data);
+      return;
+    }
+    // if we had a buffer swap try to skip packets first
+    if (packets_skip_ > 0) {
+      packet_idx =
+          GetFramePacketNumber(current_input_buffer, current_input_size,
+                               data->input_buffer_read_offset);
+      while (packets_skip_ > 0) {
+        packets_skip_--;
+        packet_idx++;
+        if (packet_idx > current_input_packet_count) {
+          if (!reuse_input_buffer) {
+            // Last packet. Try setup once more.
+            reuse_input_buffer = TrySetupNextLoop(data, true);
+          }
+          if (!reuse_input_buffer) {
+            if (is_streaming) {
+              SwapInputBuffer(data);
+            } else {
+              is_stream_done_ = true;
+            }
+          }
+          return;
+        }
+      }
+      // invalid frame pointer but needed for us
+      data->input_buffer_read_offset = packet_idx * kBitsPerPacket;
+      // continue;
+    }
+
+    if (split_frame_len_) {
+      // handle a frame that was split over two packages
+      packet_idx =
+          GetFramePacketNumber(current_input_buffer, current_input_size,
+                               data->input_buffer_read_offset);
+      packet = current_input_buffer + packet_idx * kBytesPerPacket;
+      std::tie(frame_count, frame_last_split) = GetPacketFrameCount(packet);
+      frame_idx = -1;
+
+      stream =
+          BitStream(current_input_buffer, (packet_idx + 1) * kBitsPerPacket);
+      stream.SetOffset(packet_idx * kBitsPerPacket + kBitsPerHeader);
+
+      if (split_frame_len_ > xma::kMaxFrameLength) {
+        // TODO write CopyPeekMethod
+        auto offset = stream.offset_bits();
+        stream.Copy(
+            xma_frame_.data() + 1 +
+                ((split_frame_len_partial_ + split_frame_padding_start_) / 8),
+            15 - split_frame_len_partial_);
+        stream.SetOffset(offset);
+        BitStream slen(xma_frame_.data() + 1, 15 + split_frame_padding_start_);
+        slen.Advance(split_frame_padding_start_);
+        split_frame_len_ = static_cast<int>(slen.Read(15));
+      }
+
+      if (frame_count > 0) {
+        // assert_true(xma::GetPacketFrameOffset(packet) - 32 ==
+        //             split_frame_len_ - split_frame_len_partial_);
+      }
+
+      auto offset = stream.Copy(
+          xma_frame_.data() + 1 +
+              ((split_frame_len_partial_ + split_frame_padding_start_) / 8),
+          split_frame_len_ - split_frame_len_partial_);
+      assert_true(offset ==
+                  (split_frame_padding_start_ + split_frame_len_partial_) % 8);
+    } else {
+      if (data->input_buffer_read_offset % kBitsPerPacket == 0) {
+        // Invalid offset. Go ahead and set it.
+        int packet_number =
+            GetFramePacketNumber(current_input_buffer, current_input_size,
+                                 data->input_buffer_read_offset);
+
+        if (packet_number == -1) {
+          return;
+        }
+
+        auto offset =
+            xma::GetPacketFrameOffset(current_input_buffer +
+                                      kBytesPerPacket * packet_number) +
+            data->input_buffer_read_offset;
+        if (offset == -1) {
+          // No more frames.
+          SwapInputBuffer(data);
+          // TODO partial frames? end?
+          XELOGE("XmaContext {}: TODO partial frames? end?", id());
+          assert_always("TODO");
+          return;
+        } else {
+          data->input_buffer_read_offset = offset;
+        }
+      }
+
+      if (!ValidFrameOffset(current_input_buffer, current_input_size,
+                            data->input_buffer_read_offset)) {
+        XELOGAPU("XmaContext {}: Error - Invalid read offset {}!", id(),
+                 data->input_buffer_read_offset);
+        SwapInputBuffer(data);
+        return;
+      }
+
+      // Where are we in the buffer (in XMA jargon)
+      std::tie(packet_idx, frame_idx) =
+          GetFrameNumber(current_input_buffer, current_input_size,
+                         data->input_buffer_read_offset);
+      // TODO handle
+      assert_true(packet_idx >= 0);
+      assert_true(frame_idx >= 0);
+      packet = current_input_buffer + packet_idx * kBytesPerPacket;
+      // frames that belong to this packet
+      std::tie(frame_count, frame_last_split) = GetPacketFrameCount(packet);
+      assert_true(frame_count >= 0);  // TODO end
+
+      PrepareDecoder(packet, data->sample_rate, bool(data->is_stereo));
+
+      // Current frame is split to next packet:
+      bool frame_is_split = frame_last_split && (frame_idx >= frame_count - 1);
+
+      stream =
+          BitStream(current_input_buffer, (packet_idx + 1) * kBitsPerPacket);
+      stream.SetOffset(data->input_buffer_read_offset);
+      // int frame_len;
+      // int frame_len_partial
+      split_frame_len_partial_ = static_cast<int>(stream.BitsRemaining());
+      if (split_frame_len_partial_ >= 15) {
+        split_frame_len_ = static_cast<int>(stream.Peek(15));
+      } else {
+        // assert_always();
+        split_frame_len_ = xma::kMaxFrameLength + 1;
+      }
+      assert_true(frame_is_split ==
+                  (split_frame_len_ > split_frame_len_partial_));
+
+      // TODO fix bitstream copy
+      std::memset(xma_frame_.data(), 0, xma_frame_.size());
+
+      {
+        int32_t bits_to_copy =
+            std::min(split_frame_len_, split_frame_len_partial_);
+
+        if (!stream.IsOffsetValid(bits_to_copy)) {
+          XELOGAPU(
+              "XmaContext {}: Error - Invalid amount of bits to copy! "
+              "split_frame_len: {}, split_partial: {}, offset_bits: {}",
+              id(), split_frame_len_, split_frame_len_partial_,
+              stream.offset_bits());
+          SwapInputBuffer(data);
+          return;
+        }
+        auto offset = stream.Copy(xma_frame_.data() + 1, bits_to_copy);
+        assert_true(offset < 8);
+        split_frame_padding_start_ = static_cast<uint8_t>(offset);
+      }
+
+      if (frame_is_split) {
+        // go to next xma packet of this stream
+        packets_skip_ = xma::GetPacketSkipCount(packet) + 1;
+        while (packets_skip_ > 0) {
+          packets_skip_--;
+          packet += kBytesPerPacket;
+          packet_idx++;
+          if (packet_idx >= current_input_packet_count) {
+            if (!reuse_input_buffer) {
+              // Last packet. Try setup once more.
+              reuse_input_buffer = TrySetupNextLoop(data, true);
+            }
+            if (!reuse_input_buffer) {
+              if (is_streaming) {
+                SwapInputBuffer(data);
+              } else {
+                is_stream_done_ = true;
+              }
+            }
+            return;
+          }
+        }
+        // TODO guest might read this:
+        data->input_buffer_read_offset = packet_idx * kBitsPerPacket;
+        continue;
+      }
+    }
+
+    av_packet_->data = xma_frame_.data();
+    av_packet_->size = static_cast<int>(
+        1 + ((split_frame_padding_start_ + split_frame_len_) / 8) +
+        (((split_frame_padding_start_ + split_frame_len_) % 8) ? 1 : 0));
+
+    auto padding_end = av_packet_->size * 8 -
+                       (8 + split_frame_padding_start_ + split_frame_len_);
+    assert_true(padding_end < 8);
+    xma_frame_[0] =
+        ((split_frame_padding_start_ & 7) << 5) | ((padding_end & 7) << 2);
+
+    split_frame_len_ = 0;
+    split_frame_len_partial_ = 0;
+    split_frame_padding_start_ = 0;
+
+    auto ret = avcodec_send_packet(av_context_, av_packet_);
+    if (ret < 0) {
+      XELOGE("XmaContext {}: Error - Sending packet for decoding failed", id());
+      // TODO bail out
+      assert_always();
+    }
+    ret = avcodec_receive_frame(av_context_, av_frame_);
+    /*
+    if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF)
+      // TODO AVERROR_EOF???
+      break;
+    else
+    */
+    if (ret < 0) {
+      XELOGE("XmaContext {}: Error - Decoding failed", id());
+      data->parser_error_status = 4;  // TODO(Gliniak): Find all parsing errors
+                                      // and create enumerator from them
+      SwapInputBuffer(data);
+      assert_always();
+      return;  // TODO bail out
+    }
+    assert_true(ret == 0);
+
+    {
+      // copy over 1 frame
+      // update input buffer read offset
+
+      // assert(decoded_consumed_samples_ + kSamplesPerFrame <=
+      //       current_frame_.size());
+      assert_true(av_context_->sample_fmt == AV_SAMPLE_FMT_FLTP);
+      // assert_true(frame_is_split == (frame_idx == -1));
+
+      //			dump_raw(av_frame_, id());
+      ConvertFrame(reinterpret_cast<const uint8_t**>(&av_frame_->data),
+                   bool(av_frame_->channels > 1), raw_frame_.data());
+      // decoded_consumed_samples_ += kSamplesPerFrame;
+
+      auto byte_count = kBytesPerFrameChannel << data->is_stereo;
+      assert_true(output_remaining_bytes >= byte_count);
+      output_rb.Write(raw_frame_.data(), byte_count);
+      output_remaining_bytes -= byte_count;
+      data->output_buffer_write_offset = output_rb.write_offset() / 256;
+
+      total_samples += id_ == 0 ? kSamplesPerFrame : 0;
+
+      uint32_t offset =
+          std::max(kBitsPerHeader, data->input_buffer_read_offset);
+      offset = static_cast<uint32_t>(
+          GetNextFrame(current_input_buffer, current_input_size, offset));
+
+      XELOGAPU(
+          "XmaContext {}: Next Offset: {} (Frame: {}/{} Packet: {}/{} Packet "
+          "Skip: {} - {})",
+          id(), offset, frame_idx, frame_count - 1, packet_idx,
+          current_input_packet_count, xma::GetPacketSkipCount(packet),
+          data->input_buffer_read_offset);
+      if (frame_idx + 1 >= frame_count) {
+        // Skip to next packet (no split frame)
+        packets_skip_ = xma::GetPacketSkipCount(packet) + 1;
+        while (packets_skip_ > 0) {
+          packets_skip_--;
+          packet_idx++;
+          if (packet_idx >= current_input_packet_count) {
+            if (!reuse_input_buffer) {
+              // Last packet. Try setup once more.
+              reuse_input_buffer = TrySetupNextLoop(data, true);
+            }
+            if (!reuse_input_buffer) {
+              if (is_streaming) {
+                SwapInputBuffer(data);
+                data->input_buffer_read_offset =
+                    GetPacketFirstFrameOffset(data);
+              } else {
+                is_stream_done_ = true;
+              }
+              if (output_rb.write_offset() == output_rb.read_offset()) {
+                data->output_buffer_valid = 0;
+              }
+            }
+            return;
+          }
+        }
+        packet = current_input_buffer + packet_idx * kBytesPerPacket;
+        // TODO(Gliniak): There might be an edge-case when we're in packet 26/27
+        // and GetPacketFrameOffset returns that there is no data in this packet
+        // aka. FrameOffset is set to more than 0x7FFF-0x20
+        offset =
+            xma::GetPacketFrameOffset(packet) + packet_idx * kBitsPerPacket;
+      }
+      if (offset == 0 || frame_idx == -1) {
+        // Next packet but we already skipped to it
+        if (packet_idx >= current_input_packet_count) {
+          // Buffer is fully used
+          if (!reuse_input_buffer) {
+            // Last packet. Try setup once more.
+            reuse_input_buffer = TrySetupNextLoop(data, true);
+          }
+          if (!reuse_input_buffer) {
+            if (is_streaming) {
+              SwapInputBuffer(data);
+            } else {
+              is_stream_done_ = true;
+            }
+          }
+          break;
+        }
+        offset =
+            xma::GetPacketFrameOffset(packet) + packet_idx * kBitsPerPacket;
+      }
+      // TODO buffer bounds check
+      assert_true(data->input_buffer_read_offset < offset);
+      data->input_buffer_read_offset = offset;
+    }
+  }
+
+  // assert_true((split_frame_len_ != 0) == (data->input_buffer_read_offset ==
+  // 0));
+
+  // The game will kick us again with a new output buffer later.
+  // It's important that we only invalidate this if we actually wrote to it!!
+  if (output_rb.write_offset() == output_rb.read_offset()) {
+    data->output_buffer_valid = 0;
+  }
+}
+
+uint32_t XmaContextOld::GetPacketFirstFrameOffset(
+    const XMA_CONTEXT_DATA* data) {
+  uint32_t first_frame_offset = kBitsPerHeader;
+
+  uint8_t* in0 = data->input_buffer_0_valid
+                     ? memory()->TranslatePhysical(data->input_buffer_0_ptr)
+                     : nullptr;
+  uint8_t* in1 = data->input_buffer_1_valid
+                     ? memory()->TranslatePhysical(data->input_buffer_1_ptr)
+                     : nullptr;
+  uint8_t* current_input_buffer = data->current_buffer ? in1 : in0;
+
+  if (current_input_buffer) {
+    first_frame_offset = xma::GetPacketFrameOffset(current_input_buffer);
+  }
+  return first_frame_offset;
+}
+
+size_t XmaContextOld::GetNextFrame(uint8_t* block, size_t size,
+                                   size_t bit_offset) {
+  // offset = xma::GetPacketFrameOffset(packet);
+  // TODO meh
+  // auto next_packet = bit_offset - bit_offset % kBitsPerPacket +
+  // kBitsPerPacket;
+  auto packet_idx = GetFramePacketNumber(block, size, bit_offset);
+
+  BitStream stream(block, size * 8);
+  stream.SetOffset(bit_offset);
+
+  if (stream.BitsRemaining() < 15) {
+    return 0;
+  }
+
+  uint64_t len = stream.Read(15);
+  if ((len - 15) > stream.BitsRemaining()) {
+    // assert_always("TODO");
+    // *bit_offset = next_packet;
+    // return false;
+    // return next_packet;
+    return 0;
+  } else if (len >= xma::kMaxFrameLength) {
+    assert_always("TODO");
+    // *bit_offset = next_packet;
+    // return false;
+    return 0;
+    // return next_packet;
+  }
+
+  stream.Advance(len - (15 + 1));
+  // Read the trailing bit to see if frames follow
+  if (stream.Read(1) == 0) {
+    return 0;
+  }
+
+  bit_offset += len;
+  if (packet_idx < GetFramePacketNumber(block, size, bit_offset)) {
+    return 0;
+  }
+  return bit_offset;
+}
+
+int XmaContextOld::GetFramePacketNumber(uint8_t* block, size_t size,
+                                        size_t bit_offset) {
+  size *= 8;
+  if (bit_offset >= size) {
+    // Not good :(
+    assert_always();
+    return -1;
+  }
+
+  size_t byte_offset = bit_offset >> 3;
+  size_t packet_number = byte_offset / kBytesPerPacket;
+
+  return (uint32_t)packet_number;
+}
+
+std::tuple<int, int> XmaContextOld::GetFrameNumber(uint8_t* block, size_t size,
+                                                   size_t bit_offset) {
+  auto packet_idx = GetFramePacketNumber(block, size, bit_offset);
+
+  if (packet_idx < 0 || (packet_idx + 1) * kBytesPerPacket > size) {
+    assert_always();
+    return {packet_idx, -2};
+  }
+
+  if (bit_offset == 0) {
+    return {packet_idx, -1};
+  }
+
+  uint8_t* packet = block + (packet_idx * kBytesPerPacket);
+  auto first_frame_offset = xma::GetPacketFrameOffset(packet);
+  BitStream stream(block, size * 8);
+  stream.SetOffset(packet_idx * kBitsPerPacket + first_frame_offset);
+
+  int frame_idx = 0;
+  while (true) {
+    if (stream.BitsRemaining() < 15) {
+      break;
+    }
+
+    if (stream.offset_bits() == bit_offset) {
+      break;
+    }
+
+    uint64_t size = stream.Read(15);
+    if ((size - 15) > stream.BitsRemaining()) {
+      // Last frame.
+      break;
+    } else if (size == 0x7FFF) {
+      // Invalid frame (and last of this packet)
+      break;
+    }
+
+    stream.Advance(size - (15 + 1));
+
+    // Read the trailing bit to see if frames follow
+    if (stream.Read(1) == 0) {
+      break;
+    }
+    frame_idx++;
+  }
+  return {packet_idx, frame_idx};
+}
+
+std::tuple<int, bool> XmaContextOld::GetPacketFrameCount(uint8_t* packet) {
+  auto first_frame_offset = xma::GetPacketFrameOffset(packet);
+  if (first_frame_offset > kBitsPerPacket - kBitsPerHeader) {
+    // frame offset is beyond packet end
+    return {0, false};
+  }
+
+  BitStream stream(packet, kBitsPerPacket);
+  stream.SetOffset(first_frame_offset);
+  int frame_count = 0;
+
+  while (true) {
+    if (stream.BitsRemaining() < 15) {
+      return {frame_count, false};
+    }
+
+    frame_count++;
+    uint64_t size = stream.Read(15);
+    if ((size - 15) > stream.BitsRemaining()) {
+      return {frame_count, true};
+    } else if (size == 0x7FFF) {
+      assert_always();
+      return {frame_count, true};
+    }
+
+    stream.Advance(size - (15 + 1));
+
+    if (stream.Read(1) == 0) {
+      return {frame_count, false};
+    }
+    // There is a case when frame ends EXACTLY at the end of packet.
+    // In such case we shouldn't increase frame count by additional not existing
+    // frame and don't mark it as splitted, but as a normal frame
+    if (!stream.BitsRemaining()) {
+      return {frame_count, false};
+    }
+  }
+}
+
+int XmaContextOld::PrepareDecoder(uint8_t* packet, int sample_rate,
+                                  bool is_two_channel) {
+  // Sanity check: Packet metadata is always 1 for XMA2/0 for XMA
+  assert_true((packet[2] & 0x7) == 1 || (packet[2] & 0x7) == 0);
+
+  sample_rate = GetSampleRate(sample_rate);
+
+  // Re-initialize the context with new sample rate and channels.
+  uint32_t channels = is_two_channel ? 2 : 1;
+  if (av_context_->sample_rate != sample_rate ||
+      av_context_->channels != channels) {
+    // We have to reopen the codec so it'll realloc whatever data it needs.
+    // TODO(DrChat): Find a better way.
+    avcodec_close(av_context_);
+
+    av_context_->sample_rate = sample_rate;
+    av_context_->channels = channels;
+
+    if (avcodec_open2(av_context_, av_codec_, NULL) < 0) {
+      XELOGE("XmaContext: Failed to reopen FFmpeg context");
+      return -1;
+    }
+    return 1;
+  }
+  return 0;
+}
+
+}  // namespace apu
+}  // namespace xe
--- a/src/xenia/apu/xma_context_old.h
+++ b/src/xenia/apu/xma_context_old.h
@ -0,0 +1,101 @@
+/**
+ ******************************************************************************
+ * Xenia : Xbox 360 Emulator Research Project                                 *
+ ******************************************************************************
+ * Copyright 2024 Ben Vanik. All rights reserved.                             *
+ * Released under the BSD license - see LICENSE in the root for more details. *
+ ******************************************************************************
+ */
+
+#ifndef XENIA_APU_XMA_CONTEXT_OLD_H_
+#define XENIA_APU_XMA_CONTEXT_OLD_H_
+
+#include <array>
+#include <atomic>
+#include <mutex>
+#include <queue>
+
+#include "xenia/apu/xma_context.h"
+#include "xenia/memory.h"
+#include "xenia/xbox.h"
+
+// Forward declarations
+struct AVCodec;
+struct AVCodecParserContext;
+struct AVCodecContext;
+struct AVFrame;
+struct AVPacket;
+
+namespace xe {
+namespace apu {
+
+class XmaContextOld : public XmaContext {
+ public:
+  explicit XmaContextOld();
+  ~XmaContextOld();
+
+  int Setup(uint32_t id, Memory* memory, uint32_t guest_ptr);
+  bool Work();
+
+  void Enable();
+  bool Block(bool poll);
+  void Clear();
+  void Disable();
+  void Release();
+
+ private:
+  static void SwapInputBuffer(XMA_CONTEXT_DATA* data);
+  static bool TrySetupNextLoop(XMA_CONTEXT_DATA* data,
+                               bool ignore_input_buffer_offset);
+  static void NextPacket(XMA_CONTEXT_DATA* data);
+  static int GetSampleRate(int id);
+  // Get the offset of the next frame. Does not traverse packets.
+  static size_t GetNextFrame(uint8_t* block, size_t size, size_t bit_offset);
+  // Get the containing packet number of the frame pointed to by the offset.
+  static int GetFramePacketNumber(uint8_t* block, size_t size,
+                                  size_t bit_offset);
+  // Get the packet number and the index of the frame inside that packet
+  static std::tuple<int, int> GetFrameNumber(uint8_t* block, size_t size,
+                                             size_t bit_offset);
+  // Get the number of frames contained in the packet (including truncated) and
+  // if the last frame is split.
+  static std::tuple<int, bool> GetPacketFrameCount(uint8_t* packet);
+
+  bool ValidFrameOffset(uint8_t* block, size_t size_bytes,
+                        size_t frame_offset_bits);
+  void Decode(XMA_CONTEXT_DATA* data);
+  int PrepareDecoder(uint8_t* packet, int sample_rate, bool is_two_channel);
+
+  // This method should be used ONLY when we're at the last packet of the stream
+  // and we want to find offset in next buffer
+  uint32_t GetPacketFirstFrameOffset(const XMA_CONTEXT_DATA* data);
+
+  // uint32_t decoded_consumed_samples_ = 0; // TODO do this dynamically
+  // int decoded_idx_ = -1;
+
+  // bool partial_frame_saved_ = false;
+  // bool partial_frame_size_known_ = false;
+  // size_t partial_frame_total_size_bits_ = 0;
+  // size_t partial_frame_start_offset_bits_ = 0;
+  // size_t partial_frame_offset_bits_ = 0;  // blah internal don't use this
+  // std::vector<uint8_t> partial_frame_buffer_;
+  uint32_t packets_skip_ = 0;
+
+  bool is_stream_done_ = false;
+  // bool split_frame_pending_ = false;
+  uint32_t split_frame_len_ = 0;
+  uint32_t split_frame_len_partial_ = 0;
+  uint8_t split_frame_padding_start_ = 0;
+  // first byte contains bit offset information
+  std::array<uint8_t, 1 + 4096> xma_frame_;
+
+  // uint8_t* current_frame_ = nullptr;
+  // conversion buffer for 2 channel frame
+  std::array<uint8_t, kBytesPerFrameChannel * 2> raw_frame_;
+  // std::vector<uint8_t> current_frame_ = std::vector<uint8_t>(0);
+};
+
+}  // namespace apu
+}  // namespace xe
+
+#endif  // XENIA_APU_XMA_CONTEXT_H_
--- a/src/xenia/apu/xma_decoder.cc
+++ b/src/xenia/apu/xma_decoder.cc
@ -10,6 +10,9 @@
 #include "xenia/apu/xma_decoder.h"

 #include "xenia/apu/xma_context.h"
+#include "xenia/apu/xma_context_new.h"
+#include "xenia/apu/xma_context_old.h"
+
 #include "xenia/base/cvar.h"
 #include "xenia/base/logging.h"
 #include "xenia/base/math.h"
@ -51,6 +54,14 @@ extern "C" {
 DEFINE_bool(ffmpeg_verbose, false, "Verbose FFmpeg output (debug and above)",
            "APU");

+DEFINE_bool(use_new_decoder, false,
+            "Enables usage of new experimental XMA audio decoder.", "APU");
+
+DEFINE_bool(use_dedicated_xma_thread, true,
+            "Enables XMA decoding on separate thread. Disabled should produce "
+            "better results, but decrease performance a bit.",
+            "APU");
+
 namespace xe {
 namespace apu {

@ -128,9 +139,14 @@ X_STATUS XmaDecoder::Setup(kernel::KernelState* kernel_state) {

  // Setup XMA contexts.
  for (int i = 0; i < kContextCount; ++i) {
+    if (cvars::use_new_decoder) {
+      contexts_[i] = new XmaContextNew();
+    } else {
+      contexts_[i] = new XmaContextOld();
+    }
+
    uint32_t guest_ptr = context_data_first_ptr_ + i * sizeof(XMA_CONTEXT_DATA);
-    XmaContext& context = contexts_[i];
-    if (context.Setup(i, memory(), guest_ptr)) {
+    if (contexts_[i]->Setup(i, memory(), guest_ptr)) {
      assert_always();
    }
  }
@ -144,7 +160,9 @@ X_STATUS XmaDecoder::Setup(kernel::KernelState* kernel_state) {
      kernel::object_ref<kernel::XHostThread>(new kernel::XHostThread(
          kernel_state, 128 * 1024, 0,
          [this]() {
-            WorkerThreadMain();
+            if (cvars::use_dedicated_xma_thread) {
+              WorkerThreadMain();
+            }
            return 0;
          },
          kernel_state
@ -163,8 +181,7 @@ void XmaDecoder::WorkerThreadMain() {
    // Okay, let's loop through XMA contexts to find ones we need to decode!
    bool did_work = false;
    for (uint32_t n = 0; n < kContextCount; n++) {
-      XmaContext& context = contexts_[n];
-      did_work = context.Work() || did_work;
+      did_work = contexts_[n]->Work() || did_work;

      // TODO: Need thread safety to do this.
      // Probably not too important though.
@ -228,7 +245,7 @@ uint32_t XmaDecoder::AllocateContext() {
    return 0;
  }

-  XmaContext& context = contexts_[index];
+  XmaContext& context = *contexts_[index];
  assert_false(context.is_allocated());
  context.set_is_allocated(true);
  return context.guest_ptr();
@ -238,7 +255,7 @@ void XmaDecoder::ReleaseContext(uint32_t guest_ptr) {
  auto context_id = GetContextId(guest_ptr);
  assert_true(context_id >= 0);

-  XmaContext& context = contexts_[context_id];
+  XmaContext& context = *contexts_[context_id];
  assert_true(context.is_allocated());
  context.Release();
  context_bitmap_.Release(context_id);
@ -248,7 +265,7 @@ bool XmaDecoder::BlockOnContext(uint32_t guest_ptr, bool poll) {
  auto context_id = GetContextId(guest_ptr);
  assert_true(context_id >= 0);

-  XmaContext& context = contexts_[context_id];
+  XmaContext& context = *contexts_[context_id];
  return context.Block(poll);
 }

@ -309,8 +326,11 @@ void XmaDecoder::WriteRegister(uint32_t addr, uint32_t value) {
    for (int i = 0; value && i < 32; ++i, value >>= 1) {
      if (value & 1) {
        uint32_t context_id = base_context_id + i;
-        auto& context = contexts_[context_id];
+        auto& context = *contexts_[context_id];
        context.Enable();
+        if (!cvars::use_dedicated_xma_thread) {
+          context.Work();
+        }
      }
    }
    // Signal the decoder thread to start processing.
@ -323,7 +343,7 @@ void XmaDecoder::WriteRegister(uint32_t addr, uint32_t value) {
    for (int i = 0; value && i < 32; ++i, value >>= 1) {
      if (value & 1) {
        uint32_t context_id = base_context_id + i;
-        auto& context = contexts_[context_id];
+        auto& context = *contexts_[context_id];
        context.Disable();
      }
    }
@ -337,7 +357,7 @@ void XmaDecoder::WriteRegister(uint32_t addr, uint32_t value) {
    for (int i = 0; value && i < 32; ++i, value >>= 1) {
      if (value & 1) {
        uint32_t context_id = base_context_id + i;
-        XmaContext& context = contexts_[context_id];
+        XmaContext& context = *contexts_[context_id];
        context.Clear();
      }
    }
--- a/src/xenia/apu/xma_decoder.h
+++ b/src/xenia/apu/xma_decoder.h
@ -81,7 +81,7 @@ class XmaDecoder {
  XmaRegisterFile register_file_;

  static const uint32_t kContextCount = 320;
-  XmaContext contexts_[kContextCount];
+  XmaContext* contexts_[kContextCount];
  BitMap context_bitmap_;

  uint32_t context_data_first_ptr_ = 0;
--- a/src/xenia/apu/xma_helpers.h
+++ b/src/xenia/apu/xma_helpers.h
@ -2,7 +2,7 @@
 ******************************************************************************
 * Xenia : Xbox 360 Emulator Research Project                                 *
 ******************************************************************************
- * Copyright 2021 Ben Vanik. All rights reserved.                             *
+ * Copyright 2023 Ben Vanik. All rights reserved.                             *
 * Released under the BSD license - see LICENSE in the root for more details. *
 ******************************************************************************
 */
@ -20,31 +20,33 @@ namespace xma {

 static const uint32_t kMaxFrameLength = 0x7FFF;

-// Get number of frames that /begin/ in this packet.
-uint32_t GetPacketFrameCount(uint8_t* packet) {
-  return (uint8_t)(packet[0] >> 2);
+// Get number of frames that /begin/ in this packet. This is valid only for XMA2
+// packets
+static const uint8_t GetPacketFrameCount(const uint8_t* packet) {
+  return packet[0] >> 2;
+}
+
+static const uint8_t GetPacketMetadata(const uint8_t* packet) {
+  return packet[2] & 0x7;
+}
+
+static const bool IsPacketXma2Type(const uint8_t* packet) {
+  return GetPacketMetadata(packet) == 1;
+}
+
+static const uint8_t GetPacketSkipCount(const uint8_t* packet) {
+  return packet[3];
 }

 // Get the first frame offset in bits
-uint32_t GetPacketFrameOffset(uint8_t* packet) {
+static uint32_t GetPacketFrameOffset(const uint8_t* packet) {
  uint32_t val = (uint16_t)(((packet[0] & 0x3) << 13) | (packet[1] << 5) |
                            (packet[2] >> 3));
-  // if (val > kBitsPerPacket - kBitsPerHeader) {
-  //   // There is no data in this packet
-  //   return -1;
-  // } else {
  return val + 32;
-  // }
 }

-uint32_t GetPacketMetadata(uint8_t* packet) {
-  return (uint8_t)(packet[2] & 0x7);
-}
-
-uint32_t GetPacketSkipCount(uint8_t* packet) { return (uint8_t)(packet[3]); }
-
 }  // namespace xma
 }  // namespace apu
 }  // namespace xe

-#endif  // XENIA_APU_XMA_HELPERS_H_
+#endif  // XENIA_APU_XMA_HELPERS_H_