From d83d8eee010589757f398958055cd4e4c201a5d2 Mon Sep 17 00:00:00 2001
From: Gliniak <Gliniak93@gmail.com>
Date: Tue, 23 Jan 2024 19:42:44 +0100
Subject: [PATCH] [APU] Initial version of new XMA decoder

---
 src/xenia/apu/xma_context.cc | 1146 +++++++++++++---------------------
 src/xenia/apu/xma_context.h  |  136 ++--
 src/xenia/apu/xma_helpers.h  |   32 +-
 3 files changed, 560 insertions(+), 754 deletions(-)
diff --git a/src/xenia/apu/xma_context.cc b/src/xenia/apu/xma_context.cc
index 7f9c89095..b33a89c0c 100644
--- a/src/xenia/apu/xma_context.cc
+++ b/src/xenia/apu/xma_context.cc
@@ -2,7 +2,7 @@
 ******************************************************************************
 * Xenia : Xbox 360 Emulator Research Project                                 *
 ******************************************************************************
-* Copyright 2021 Ben Vanik. All rights reserved.                             *
+* Copyright 2023 Ben Vanik. All rights reserved.                             *
 * Released under the BSD license - see LICENSE in the root for more details. *
 ******************************************************************************
 */
@@ -10,15 +10,13 @@
 #include "xenia/apu/xma_context.h"
 
 #include <algorithm>
-#include <cstring>
 
 #include "xenia/apu/xma_decoder.h"
 #include "xenia/apu/xma_helpers.h"
-#include "xenia/base/bit_stream.h"
+
 #include "xenia/base/logging.h"
 #include "xenia/base/platform.h"
 #include "xenia/base/profiling.h"
-#include "xenia/base/ring_buffer.h"
 
 extern "C" {
 #if XE_COMPILER_MSVC
@@ -37,6 +35,20 @@ extern "C" {
 namespace xe {
 namespace apu {
 
+static void dump_raw(AVFrame* frame, int id) {
+  FILE* outfile = fopen(fmt::format("out{}.raw", id).c_str(), "ab");
+  if (!outfile) {
+    return;
+  }
+  size_t data_size = sizeof(float);
+  for (int i = 0; i < frame->nb_samples; i++) {
+    for (int ch = 0; ch < frame->channels; ch++) {
+      fwrite(frame->data[ch] + data_size * i, 1, data_size, outfile);
+    }
+  }
+  fclose(outfile);
+}
+
 XmaContext::XmaContext() = default;
 
 XmaContext::~XmaContext() {
@@ -49,9 +61,6 @@ XmaContext::~XmaContext() {
   if (av_frame_) {
     av_frame_free(&av_frame_);
   }
-  // if (current_frame_) {
-  //   delete[] current_frame_;
-  //  }
 }
 
 int XmaContext::Setup(uint32_t id, Memory* memory, uint32_t guest_ptr) {
@@ -62,9 +71,8 @@ int XmaContext::Setup(uint32_t id, Memory* memory, uint32_t guest_ptr) {
   // Allocate ffmpeg stuff:
   av_packet_ = av_packet_alloc();
   assert_not_null(av_packet_);
-  // chrispy: preallocate this buffer so that ffmpeg isn't reallocating it for
-  // every packet, these allocations were causing RtlSubsegmentInitialize
   av_packet_->buf = av_buffer_alloc(128 * 1024);
+
   // find the XMA2 audio decoder
   av_codec_ = avcodec_find_decoder(AV_CODEC_ID_XMAFRAMES);
   if (!av_codec_) {
@@ -92,20 +100,103 @@ int XmaContext::Setup(uint32_t id, Memory* memory, uint32_t guest_ptr) {
   return 0;
 }
 
+RingBuffer XmaContext::PrepareOutputRingBuffer(XMA_CONTEXT_DATA* data) {
+  const uint32_t output_capacity =
+      data->output_buffer_block_count * kOutputBytesPerBlock;
+  const uint32_t output_read_offset =
+      data->output_buffer_read_offset * kOutputBytesPerBlock;
+  const uint32_t output_write_offset =
+      data->output_buffer_write_offset * kOutputBytesPerBlock;
+
+  if (output_capacity > kOutputMaxSizeBytes) {
+    XELOGW(
+        "XmaContext {}: Output buffer uses more space than expected! "
+        "(Actual: {} Max: {})",
+        id(), output_capacity, kOutputMaxSizeBytes);
+  }
+
+  uint8_t* output_buffer = memory()->TranslatePhysical(data->output_buffer_ptr);
+
+  // Output buffers are in raw PCM samples, 256 bytes per block.
+  // Output buffer is a ring buffer. We need to write from the write offset
+  // to the read offset.
+  RingBuffer output_rb(output_buffer, output_capacity);
+  output_rb.set_read_offset(output_read_offset);
+  output_rb.set_write_offset(output_write_offset);
+  remaining_subframe_blocks_in_output_buffer_ =
+      (int32_t)output_rb.write_count() / kOutputBytesPerBlock;
+
+  return output_rb;
+}
+
 bool XmaContext::Work() {
   if (!is_enabled() || !is_allocated()) {
     return false;
   }
-  {
-    std::lock_guard<xe_mutex> lock(lock_);
-    set_is_enabled(false);
 
-    auto context_ptr = memory()->TranslateVirtual(guest_ptr());
-    XMA_CONTEXT_DATA data(context_ptr);
-    Decode(&data);
+  std::lock_guard<xe_mutex> lock(lock_);
+  set_is_enabled(false);
+
+  auto context_ptr = memory()->TranslateVirtual(guest_ptr());
+  XMA_CONTEXT_DATA data(context_ptr);
+
+  if (!data.output_buffer_valid) {
+    return true;
+  }
+
+  RingBuffer output_rb = PrepareOutputRingBuffer(&data);
+
+  const int32_t minimum_subframe_decode_count =
+      (data.subframe_decode_count * 2) - 1;
+
+  // We don't have enough space to even make one pass
+  // Waiting for decoder to return more space.
+  if (minimum_subframe_decode_count >
+      remaining_subframe_blocks_in_output_buffer_) {
+    XELOGD("XmaContext {}: No space for subframe decoding {}/{}!", id(),
+           minimum_subframe_decode_count,
+           remaining_subframe_blocks_in_output_buffer_);
     data.Store(context_ptr);
     return true;
   }
+
+  while (remaining_subframe_blocks_in_output_buffer_ >=
+         minimum_subframe_decode_count) {
+    XELOGAPU(
+        "XmaContext {}: Write Count: {}, Capacity: {} - {} {} Subframes: {} "
+        "Skip: {}",
+        id(), (uint32_t)output_rb.write_count(),
+        remaining_subframe_blocks_in_output_buffer_,
+        data.input_buffer_0_valid + (data.input_buffer_1_valid << 1),
+        data.output_buffer_valid, data.subframe_decode_count,
+        data.subframe_skip_count);
+
+    Decode(&data);
+    Consume(&output_rb, &data);
+
+    if (!data.IsAnyInputBufferValid() || data.error_status == 4) {
+      break;
+    }
+  }
+
+  data.output_buffer_write_offset =
+      output_rb.write_offset() / kOutputBytesPerBlock;
+
+  XELOGAPU("XmaContext {}: Read Output: {} Write Output: {}", id(),
+           data.output_buffer_read_offset, data.output_buffer_write_offset);
+
+  // That's a bit misleading due to nature of ringbuffer
+  // when write and read offset matches it might mean that we wrote nothing
+  // or we fully saturated allocated space.
+  if (output_rb.empty()) {
+    data.output_buffer_valid = 0;
+  }
+
+  // TODO: Rewrite!
+  // There is a case when game can modify certain parts of context mid-play
+  // and decoder should be aware of it
+  data.Store(context_ptr);
+  return true;
 }
 
 void XmaContext::Enable() {
@@ -116,12 +207,9 @@ void XmaContext::Enable() {
 
   XELOGAPU("XmaContext: kicking context {} (buffer {} {}/{} bits)", id(),
            data.current_buffer, data.input_buffer_read_offset,
-           (data.current_buffer == 0 ? data.input_buffer_0_packet_count
-                                     : data.input_buffer_1_packet_count) *
-               kBitsPerPacket);
+           data.GetCurrentInputBufferPacketCount() * kBitsPerPacket);
 
   data.Store(context_ptr);
-
   set_is_enabled(true);
 }
 
@@ -150,12 +238,9 @@ void XmaContext::Clear() {
   data.input_buffer_read_offset = 0;
   data.output_buffer_read_offset = 0;
   data.output_buffer_write_offset = 0;
+  data.input_buffer_read_offset = kBitsPerPacketHeader;
 
-  xma_frame_.fill(0);
-  split_frame_len_ = 0;
-  split_frame_len_partial_ = 0;
-  split_frame_padding_start_ = 0;
-
+  current_frame_remaining_subframes_ = 0;
   data.Store(context_ptr);
 }
 
@@ -175,6 +260,10 @@ void XmaContext::Release() {
   std::memset(context_ptr, 0, sizeof(XMA_CONTEXT_DATA));  // Zero it.
 }
 
+int XmaContext::GetSampleRate(int id) {
+  return kIdToSampleRate[std::min(id, 3)];
+}
+
 void XmaContext::SwapInputBuffer(XMA_CONTEXT_DATA* data) {
   // No more frames.
   if (data->current_buffer == 0) {
@@ -183,178 +272,62 @@ void XmaContext::SwapInputBuffer(XMA_CONTEXT_DATA* data) {
     data->input_buffer_1_valid = 0;
   }
   data->current_buffer ^= 1;
-  data->input_buffer_read_offset = kBitsPerHeader;
+  data->input_buffer_read_offset = kBitsPerPacketHeader;
 }
 
-bool XmaContext::TrySetupNextLoop(XMA_CONTEXT_DATA* data,
-                                  bool ignore_input_buffer_offset) {
-  // Setup the input buffer offset if next loop exists.
-  // TODO(Pseudo-Kernel): Need to handle loop in the following cases.
-  // 1. loop_start == loop_end == 0
-  // 2. loop_start > loop_end && loop_count > 0
-  if (data->loop_count > 0 && data->loop_start < data->loop_end &&
-      (ignore_input_buffer_offset ||
-       data->input_buffer_read_offset >= data->loop_end)) {
-    // Loop back to the beginning.
-    data->input_buffer_read_offset = data->loop_start;
-    if (data->loop_count < 255) {
-      data->loop_count--;
-    }
-    return true;
-  }
-  return false;
-}
-
-/*
-void XmaContext::NextPacket(
-    uint8_t* input_buffer,
-    uint32_t input_size,
-    uint32_t input_buffer_read_offset) {
-*/
-void XmaContext::NextPacket(XMA_CONTEXT_DATA* data) {
-  // auto packet_idx = GetFramePacketNumber(input_buffer, input_size,
-  // input_buffer_read_offset);
-
-  // packet_idx++;
-  // if (packet_idx++ >= input_size)
-}
-
-int XmaContext::GetSampleRate(int id) {
-  switch (id) {
-    case 0:
-      return 24000;
-    case 1:
-      return 32000;
-    case 2:
-      return 44100;
-    case 3:
-      return 48000;
-  }
-  assert_always();
-  return 0;
-}
-
-bool XmaContext::ValidFrameOffset(uint8_t* block, size_t size_bytes,
-                                  size_t frame_offset_bits) {
-  uint32_t packet_num =
-      GetFramePacketNumber(block, size_bytes, frame_offset_bits);
-  if (packet_num == -1) {
-    // Invalid packet number
-    XELOGAPU("ValidFrameOffset: Invalid packet number");
-    return false;
-  }
-
-  uint8_t* packet = block + (packet_num * kBytesPerPacket);
-  size_t relative_offset_bits = frame_offset_bits % kBitsPerPacket;
-
-  uint32_t first_frame_offset = xma::GetPacketFrameOffset(packet);
-  if (first_frame_offset == -1 || first_frame_offset > kBitsPerPacket) {
-    XELOGAPU("ValidFrameOffset: Invalid frame offset {}", first_frame_offset);
-    // Packet only contains a partial frame, so no frames can start here.
-    return false;
-  }
-
-  BitStream stream(packet, kBitsPerPacket);
-  stream.SetOffset(first_frame_offset);
-  while (true) {
-    if (stream.offset_bits() == relative_offset_bits) {
-      return true;
-    }
-
-    if (stream.BitsRemaining() < 15) {
-      XELOGAPU("ValidFrameOffset: No room for next frame header {}",
-               first_frame_offset);
-      // Not enough room for another frame header.
-      return false;
-    }
-
-    uint64_t size = stream.Read(15);
-    if ((size - 15) > stream.BitsRemaining()) {
-      XELOGAPU("ValidFrameOffset: Last frame {} - {}", first_frame_offset,
-               size);
-      // Last frame.
-      return false;
-    } else if (size == 0x7FFF) {
-      // Invalid frame (and last of this packet)
-      return false;
-    }
-
-    stream.Advance(size - 16);
-
-    // Read the trailing bit to see if frames follow
-    if (stream.Read(1) == 0) {
-      break;
-    }
-  }
-
-  return false;
-}
-
-static void dump_raw(AVFrame* frame, int id) {
-  FILE* outfile = fopen(fmt::format("out{}.raw", id).c_str(), "ab");
-  if (!outfile) {
+void XmaContext::Consume(RingBuffer* output_rb, XMA_CONTEXT_DATA* data) {
+  if (!current_frame_remaining_subframes_) {
     return;
   }
-  size_t data_size = sizeof(float);
-  for (int i = 0; i < frame->nb_samples; i++) {
-    for (int ch = 0; ch < frame->channels; ch++) {
-      fwrite(frame->data[ch] + data_size * i, 1, data_size, outfile);
-    }
-  }
-  fclose(outfile);
+
+  const int8_t subframes_to_write =
+      std::min((int8_t)current_frame_remaining_subframes_,
+               (int8_t)data->subframe_decode_count);
+
+  const int8_t raw_frame_read_offset =
+      ((kBytesPerFrameChannel / kOutputBytesPerBlock) << data->is_stereo) -
+      current_frame_remaining_subframes_;
+  // + data->subframe_skip_count;
+
+  output_rb->Write(
+      raw_frame_.data() + (kOutputBytesPerBlock * raw_frame_read_offset),
+      subframes_to_write * kOutputBytesPerBlock);
+  remaining_subframe_blocks_in_output_buffer_ -= subframes_to_write;
+  current_frame_remaining_subframes_ -= subframes_to_write;
+
+  XELOGAPU("XmaContext {}: Consume: {} - {} - {} - {} - {}", id(),
+           remaining_subframe_blocks_in_output_buffer_,
+           data->output_buffer_write_offset, data->output_buffer_read_offset,
+           output_rb->write_offset(), current_frame_remaining_subframes_);
 }
 
 void XmaContext::Decode(XMA_CONTEXT_DATA* data) {
   SCOPE_profile_cpu_f("apu");
 
-  // What I see:
-  // XMA outputs 2 bytes per sample
-  // 512 samples per frame (128 per subframe)
-  // Max output size is data.output_buffer_block_count * 256
-
-  // This decoder is fed packets (max 4095 per buffer)
-  // Packets contain "some" frames
-  // 32bit header (big endian)
-
-  // Frames are the smallest thing the SPUs can decode.
-  // They can and usually will span packets.
-
-  // Sample rates (data.sample_rate):
-  // 0 - 24 kHz
-  // 1 - 32 kHz
-  // 2 - 44.1 kHz
-  // 3 - 48 kHz
-
-  // SPUs also support stereo decoding. (data.is_stereo)
-
-  // Check the output buffer - we cannot decode anything else if it's
-  // unavailable.
-  if (!data->output_buffer_valid) {
-    return;
-  }
-
   // No available data.
-  if (!data->input_buffer_0_valid && !data->input_buffer_1_valid) {
+  if (!data->IsAnyInputBufferValid()) {
+    // data->error_status = 4;
     return;
   }
 
-  // XAudio Loops
-  // loop_count:
-  //  - XAUDIO2_MAX_LOOP_COUNT = 254
-  //  - XAUDIO2_LOOP_INFINITE = 255
-  // loop_start/loop_end are bit offsets to a specific frame
+  if (current_frame_remaining_subframes_ > 0) {
+    return;
+  }
 
-  // Translate pointers for future use.
-  // Sometimes the game will use rolling input buffers. If they do, we cannot
-  // assume they form a complete block! In addition, the buffers DO NOT have
-  // to be contiguous!
-  uint8_t* in0 = data->input_buffer_0_valid
-                     ? memory()->TranslatePhysical(data->input_buffer_0_ptr)
-                     : nullptr;
-  uint8_t* in1 = data->input_buffer_1_valid
-                     ? memory()->TranslatePhysical(data->input_buffer_1_ptr)
-                     : nullptr;
-  uint8_t* current_input_buffer = data->current_buffer ? in1 : in0;
+  uint8_t* current_input_buffer = GetCurrentInputBuffer(data);
+
+  if (!current_input_buffer) {
+    XELOGE(
+        "XmaContext {}: Invalid current buffer! Selected Buffer: {} Valid: {} "
+        "Pointer: {:08X}",
+        id(), data->current_buffer, data->IsCurrentInputBufferValid(),
+        data->GetCurrentInputBufferAddress());
+    return;
+  }
+
+  input_buffer_.fill(0);
+
+  UpdateLoopStatus(data);
 
   if (!current_input_buffer) {
     XELOGE("XmaContext {}: Error - input buffer pointer is invalid!", id());
@@ -374,467 +347,309 @@ void XmaContext::Decode(XMA_CONTEXT_DATA* data) {
       current_input_buffer, data->output_buffer_ptr,
       data->output_buffer_block_count);
 
-  if (is_stream_done_) {
-    is_stream_done_ = false;
-    packets_skip_ = 0;
+  const uint32_t current_input_size = GetCurrentInputBufferSize(data);
+  const uint32_t current_input_packet_count =
+      current_input_size / kBytesPerPacket;
+
+  const int16_t packet_index =
+      GetPacketNumber(current_input_size, data->input_buffer_read_offset);
+
+  if (packet_index == -1) {
+    XELOGE("XmaContext {}: Invalid packet index. Input read offset: {}", id(),
+           data->input_buffer_read_offset);
+    return;
+  }
+
+  uint8_t* packet = current_input_buffer + (packet_index * kBytesPerPacket);
+  // Because game can reset read offset. We must assure that new offset is
+  // valid. Splitted frames aren't handled here, so it's not a big deal.
+  const uint32_t frame_offset = xma::GetPacketFrameOffset(packet);
+  if (data->input_buffer_read_offset < frame_offset) {
+    data->input_buffer_read_offset = frame_offset;
+  }
+
+  const uint32_t relative_offset =
+      data->input_buffer_read_offset % kBitsPerPacket;
+  const kPacketInfo packet_info = GetPacketInfo(packet, relative_offset);
+  const uint32_t packet_to_skip = xma::GetPacketSkipCount(packet) + 1;
+  const uint32_t next_packet_index = packet_index + packet_to_skip;
+
+  BitStream stream =
+      BitStream(current_input_buffer, (packet_index + 1) * kBitsPerPacket);
+  stream.SetOffset(data->input_buffer_read_offset);
+
+  const uint64_t bits_to_copy = GetAmountOfBitsToRead(
+      (uint32_t)stream.BitsRemaining(), packet_info.current_frame_size_);
+
+  if (bits_to_copy == 0) {
+    XELOGE("XmaContext {}: There is no bits to copy!", id());
     SwapInputBuffer(data);
     return;
   }
 
-  size_t input_buffer_0_size =
-      data->input_buffer_0_packet_count * kBytesPerPacket;
-  size_t input_buffer_1_size =
-      data->input_buffer_1_packet_count * kBytesPerPacket;
+  if (packet_info.isLastFrameInPacket()) {
+    // Frame is a splitted frame
+    if (stream.BitsRemaining() < packet_info.current_frame_size_) {
+      const uint8_t* next_packet =
+          GetNextPacket(data, next_packet_index, current_input_packet_count);
 
-  size_t current_input_size =
-      data->current_buffer ? input_buffer_1_size : input_buffer_0_size;
-  size_t current_input_packet_count = current_input_size / kBytesPerPacket;
-  bool is_streaming = data->input_buffer_0_packet_count == 1 &&
-                      data->input_buffer_1_packet_count == 1;
-
-  // Output buffers are in raw PCM samples, 256 bytes per block.
-  // Output buffer is a ring buffer. We need to write from the write offset
-  // to the read offset.
-  uint8_t* output_buffer = memory()->TranslatePhysical(data->output_buffer_ptr);
-  uint32_t output_capacity =
-      data->output_buffer_block_count * kBytesPerSubframeChannel;
-  uint32_t output_read_offset =
-      data->output_buffer_read_offset * kBytesPerSubframeChannel;
-  uint32_t output_write_offset =
-      data->output_buffer_write_offset * kBytesPerSubframeChannel;
-
-  RingBuffer output_rb(output_buffer, output_capacity);
-  output_rb.set_read_offset(output_read_offset);
-  output_rb.set_write_offset(output_write_offset);
-
-  // We can only decode an entire frame and write it out at a time, so
-  // don't save any samples.
-  // TODO(JoelLinn): subframes when looping
-  size_t output_remaining_bytes = output_rb.write_count();
-  output_remaining_bytes -=
-      output_remaining_bytes % (kBytesPerFrameChannel << data->is_stereo);
-
-  // is_dirty_ = true; // TODO
-  // is_dirty_ = false;  // TODO
-  assert_false(data->stop_when_done);
-  assert_false(data->interrupt_when_done);
-  static int total_samples = 0;
-  // Decode until we can't write any more data.
-  while (output_remaining_bytes > 0) {
-    if (!data->input_buffer_0_valid && !data->input_buffer_1_valid) {
-      // Out of data.
-      break;
+      if (!next_packet) {
+        // Error path
+        // Decoder probably should return error here
+        // Not sure what error code should be returned
+        data->error_status = 4;
+        return;
+      }
+      // Copy next packet to buffer
+      std::memcpy(input_buffer_.data() + kBytesPerPacketData,
+                  next_packet + kBytesPerPacketHeader, kBytesPerPacketData);
     }
-    // Setup the input buffer if we are at loop_end.
-    // The input buffer must not be swapped out until all loops are processed.
-    bool reuse_input_buffer = TrySetupNextLoop(data, false);
+  }
 
-    // assert_true(packets_skip_ == 0);
-    // assert_true(split_frame_len_ == 0);
-    // assert_true(split_frame_len_partial_ == 0);
+  // Copy current packet to buffer
+  std::memcpy(input_buffer_.data(), packet + kBytesPerPacketHeader,
+              kBytesPerPacketData);
 
-    // Where are we in the buffer (in XMA jargon)
-    int packet_idx, frame_idx, frame_count;
-    uint8_t* packet;
-    bool frame_last_split;
+  stream = BitStream(input_buffer_.data(),
+                     (kBitsPerPacket - kBitsPerPacketHeader) * 2);
+  stream.SetOffset(relative_offset - kBitsPerPacketHeader);
 
-    BitStream stream(current_input_buffer, current_input_size * 8);
-    stream.SetOffset(data->input_buffer_read_offset);
+  xma_frame_.fill(0);
 
-    if (data->input_buffer_read_offset > current_input_size * 8) {
-      XELOGE(
-          "XmaContext {}: Error - Provided input offset exceed input buffer "
-          "size! ({} > {})",
-          id(), data->input_buffer_read_offset, current_input_size * 8);
-      SwapInputBuffer(data);
-      return;
-    }
-    // if we had a buffer swap try to skip packets first
-    if (packets_skip_ > 0) {
-      packet_idx =
-          GetFramePacketNumber(current_input_buffer, current_input_size,
-                               data->input_buffer_read_offset);
-      while (packets_skip_ > 0) {
-        packets_skip_--;
-        packet_idx++;
-        if (packet_idx > current_input_packet_count) {
-          if (!reuse_input_buffer) {
-            // Last packet. Try setup once more.
-            reuse_input_buffer = TrySetupNextLoop(data, true);
-          }
-          if (!reuse_input_buffer) {
-            if (is_streaming) {
-              SwapInputBuffer(data);
-            } else {
-              is_stream_done_ = true;
-            }
-          }
-          return;
-        }
-      }
-      // invalid frame pointer but needed for us
-      data->input_buffer_read_offset = packet_idx * kBitsPerPacket;
-      // continue;
-    }
+  XELOGAPU(
+      "XmaContext {}: Reading Frame {}/{} (size: {}) From Packet "
+      "{}/{}",
+      id(), (int32_t)packet_info.current_frame_, packet_info.frame_count_,
+      packet_info.current_frame_size_, packet_index,
+      current_input_packet_count);
 
-    if (split_frame_len_) {
-      // handle a frame that was split over two packages
-      packet_idx =
-          GetFramePacketNumber(current_input_buffer, current_input_size,
-                               data->input_buffer_read_offset);
-      packet = current_input_buffer + packet_idx * kBytesPerPacket;
-      std::tie(frame_count, frame_last_split) = GetPacketFrameCount(packet);
-      frame_idx = -1;
+  const uint32_t padding_start = static_cast<uint8_t>(
+      stream.Copy(xma_frame_.data() + 1, packet_info.current_frame_size_));
 
-      stream =
-          BitStream(current_input_buffer, (packet_idx + 1) * kBitsPerPacket);
-      stream.SetOffset(packet_idx * kBitsPerPacket + kBitsPerHeader);
+  raw_frame_.fill(0);
 
-      if (split_frame_len_ > xma::kMaxFrameLength) {
-        // TODO write CopyPeekMethod
-        auto offset = stream.offset_bits();
-        stream.Copy(
-            xma_frame_.data() + 1 +
-                ((split_frame_len_partial_ + split_frame_padding_start_) / 8),
-            15 - split_frame_len_partial_);
-        stream.SetOffset(offset);
-        BitStream slen(xma_frame_.data() + 1, 15 + split_frame_padding_start_);
-        slen.Advance(split_frame_padding_start_);
-        split_frame_len_ = static_cast<int>(slen.Read(15));
-      }
+  PrepareDecoder(data->sample_rate, bool(data->is_stereo));
+  PreparePacket(packet_info.current_frame_size_, padding_start);
+  if (DecodePacket(av_context_, av_packet_, av_frame_)) {
+    // dump_raw(av_frame_, id());
+    ConvertFrame((const uint8_t**)av_frame_->data, bool(data->is_stereo),
+                 raw_frame_.data());
+  }
 
-      if (frame_count > 0) {
-        // assert_true(xma::GetPacketFrameOffset(packet) - 32 ==
-        //             split_frame_len_ - split_frame_len_partial_);
-      }
+  // TODO: Write function to regenerate decoder
+  // TODO: Be aware of subframe_skips & loops subframes skips
+  current_frame_remaining_subframes_ = 4 << data->is_stereo;
 
-      auto offset = stream.Copy(
-          xma_frame_.data() + 1 +
-              ((split_frame_len_partial_ + split_frame_padding_start_) / 8),
-          split_frame_len_ - split_frame_len_partial_);
-      assert_true(offset ==
-                  (split_frame_padding_start_ + split_frame_len_partial_) % 8);
-    } else {
-      if (data->input_buffer_read_offset % kBitsPerPacket == 0) {
-        // Invalid offset. Go ahead and set it.
-        int packet_number =
-            GetFramePacketNumber(current_input_buffer, current_input_size,
-                                 data->input_buffer_read_offset);
+  // Compute where to go next.
+  if (!packet_info.isLastFrameInPacket()) {
+    const uint32_t next_frame_offset =
+        (data->input_buffer_read_offset + bits_to_copy) % kBitsPerPacket;
 
-        if (packet_number == -1) {
-          return;
-        }
+    XELOGAPU("XmaContext {}: Index: {}/{} - Next frame offset: {}", id(),
+             (int32_t)packet_info.current_frame_, packet_info.frame_count_,
+             next_frame_offset);
 
-        auto offset =
-            xma::GetPacketFrameOffset(current_input_buffer +
-                                      kBytesPerPacket * packet_number) +
-            data->input_buffer_read_offset;
-        if (offset == -1) {
-          // No more frames.
-          SwapInputBuffer(data);
-          // TODO partial frames? end?
-          XELOGE("XmaContext {}: TODO partial frames? end?", id());
-          assert_always("TODO");
-          return;
-        } else {
-          data->input_buffer_read_offset = offset;
-        }
-      }
+    data->input_buffer_read_offset =
+        (packet_index * kBitsPerPacket) + next_frame_offset;
+    return;
+  }
 
-      if (!ValidFrameOffset(current_input_buffer, current_input_size,
-                            data->input_buffer_read_offset)) {
-        XELOGAPU("XmaContext {}: Error - Invalid read offset {}!", id(),
-                 data->input_buffer_read_offset);
+  const uint8_t* next_packet =
+      GetNextPacket(data, next_packet_index, current_input_packet_count);
+
+  if (!next_packet) {
+    // Error path
+    // Decoder probably should return error here
+    // Not sure what error code should be returned
+    // data->error_status = 4;
+    // data->output_buffer_valid = 0;
+    // return;
+  }
+
+  uint32_t next_input_offset = GetNextPacketReadOffset(
+      current_input_buffer, next_packet_index, current_input_packet_count);
+
+  if (next_input_offset == kBitsPerPacketHeader) {
+    SwapInputBuffer(data);
+    // We're at start of next buffer
+    // If it have any frame in this packet decoder should go to first frame in
+    // packet If it doesn't have any frame then it should immediatelly go to
+    // next packet
+    if (data->IsAnyInputBufferValid()) {
+      next_input_offset = xma::GetPacketFrameOffset(
+          memory()->TranslatePhysical(data->GetCurrentInputBufferAddress()));
+
+      if (next_input_offset > kMaxFrameSizeinBits) {
+        XELOGAPU(
+            "XmaContext {}: Next buffer contains no frames in packet! Frame "
+            "offset: {}",
+            id(), next_input_offset);
         SwapInputBuffer(data);
         return;
       }
-
-      // Where are we in the buffer (in XMA jargon)
-      std::tie(packet_idx, frame_idx) =
-          GetFrameNumber(current_input_buffer, current_input_size,
-                         data->input_buffer_read_offset);
-      // TODO handle
-      assert_true(packet_idx >= 0);
-      assert_true(frame_idx >= 0);
-      packet = current_input_buffer + packet_idx * kBytesPerPacket;
-      // frames that belong to this packet
-      std::tie(frame_count, frame_last_split) = GetPacketFrameCount(packet);
-      assert_true(frame_count >= 0);  // TODO end
-
-      PrepareDecoder(packet, data->sample_rate, bool(data->is_stereo));
-
-      // Current frame is split to next packet:
-      bool frame_is_split = frame_last_split && (frame_idx >= frame_count - 1);
-
-      stream =
-          BitStream(current_input_buffer, (packet_idx + 1) * kBitsPerPacket);
-      stream.SetOffset(data->input_buffer_read_offset);
-      // int frame_len;
-      // int frame_len_partial
-      split_frame_len_partial_ = static_cast<int>(stream.BitsRemaining());
-      if (split_frame_len_partial_ >= 15) {
-        split_frame_len_ = static_cast<int>(stream.Peek(15));
-      } else {
-        // assert_always();
-        split_frame_len_ = xma::kMaxFrameLength + 1;
-      }
-      assert_true(frame_is_split ==
-                  (split_frame_len_ > split_frame_len_partial_));
-
-      // TODO fix bitstream copy
-      std::memset(xma_frame_.data(), 0, xma_frame_.size());
-
-      {
-        int32_t bits_to_copy =
-            std::min(split_frame_len_, split_frame_len_partial_);
-
-        if (!stream.IsOffsetValid(bits_to_copy)) {
-          XELOGAPU(
-              "XmaContext {}: Error - Invalid amount of bits to copy! "
-              "split_frame_len: {}, split_partial: {}, offset_bits: {}",
-              id(), split_frame_len_, split_frame_len_partial_,
-              stream.offset_bits());
-          SwapInputBuffer(data);
-          return;
-        }
-        auto offset = stream.Copy(xma_frame_.data() + 1, bits_to_copy);
-        assert_true(offset < 8);
-        split_frame_padding_start_ = static_cast<uint8_t>(offset);
-      }
-
-      if (frame_is_split) {
-        // go to next xma packet of this stream
-        packets_skip_ = xma::GetPacketSkipCount(packet) + 1;
-        while (packets_skip_ > 0) {
-          packets_skip_--;
-          packet += kBytesPerPacket;
-          packet_idx++;
-          if (packet_idx >= current_input_packet_count) {
-            if (!reuse_input_buffer) {
-              // Last packet. Try setup once more.
-              reuse_input_buffer = TrySetupNextLoop(data, true);
-            }
-            if (!reuse_input_buffer) {
-              if (is_streaming) {
-                SwapInputBuffer(data);
-              } else {
-                is_stream_done_ = true;
-              }
-            }
-            return;
-          }
-        }
-        // TODO guest might read this:
-        data->input_buffer_read_offset = packet_idx * kBitsPerPacket;
-        continue;
-      }
+      XELOGAPU("XmaContext {}: Next buffer first frame starts at: {}", id(),
+               next_input_offset);
     }
+  }
+  data->input_buffer_read_offset = next_input_offset;
+  return;
+}
 
-    av_packet_->data = xma_frame_.data();
-    av_packet_->size = static_cast<int>(
-        1 + ((split_frame_padding_start_ + split_frame_len_) / 8) +
-        (((split_frame_padding_start_ + split_frame_len_) % 8) ? 1 : 0));
+//  Frame & Packet searching methods
 
-    auto padding_end = av_packet_->size * 8 -
-                       (8 + split_frame_padding_start_ + split_frame_len_);
-    assert_true(padding_end < 8);
-    xma_frame_[0] =
-        ((split_frame_padding_start_ & 7) << 5) | ((padding_end & 7) << 2);
+void XmaContext::UpdateLoopStatus(XMA_CONTEXT_DATA* data) {
+  if (data->loop_count == 0) {
+    return;
+  }
 
-    split_frame_len_ = 0;
-    split_frame_len_partial_ = 0;
-    split_frame_padding_start_ = 0;
+  const uint32_t loop_start = std::max(kBitsPerPacketHeader, data->loop_start);
+  const uint32_t loop_end = std::max(kBitsPerPacketHeader, data->loop_end);
 
-    auto ret = avcodec_send_packet(av_context_, av_packet_);
-    if (ret < 0) {
-      XELOGE("XmaContext {}: Error - Sending packet for decoding failed", id());
-      // TODO bail out
-      assert_always();
-    }
-    ret = avcodec_receive_frame(av_context_, av_frame_);
-    /*
-    if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF)
-      // TODO AVERROR_EOF???
+  XELOGAPU("XmaContext {}: Looped Data: {} < {} (Start: {}) Remaining: {}",
+           id(), data->input_buffer_read_offset, data->loop_end,
+           data->loop_start, data->loop_count);
+
+  if (data->input_buffer_read_offset != loop_end) {
+    return;
+  }
+
+  data->input_buffer_read_offset = loop_start;
+
+  if (data->loop_count != 255) {
+    data->loop_count--;
+  }
+}
+
+const uint8_t* XmaContext::GetNextPacket(XMA_CONTEXT_DATA* data,
+                                         uint32_t next_packet_index,
+                                         uint32_t current_input_packet_count) {
+  if (next_packet_index < current_input_packet_count) {
+    return memory()->TranslatePhysical(data->GetCurrentInputBufferAddress()) +
+           next_packet_index * kBytesPerPacket;
+  }
+
+  const uint8_t next_buffer_index = data->current_buffer ^ 1;
+
+  if (!data->IsInputBufferValid(next_buffer_index)) {
+    return nullptr;
+  }
+
+  const uint32_t next_buffer_address =
+      data->GetInputBufferAddress(next_buffer_index);
+
+  if (!next_buffer_address) {
+    // This should never occur but there is always a chance
+    XELOGE(
+        "XmaContext {}: Buffer is marked as valid, but doesn't have valid "
+        "pointer!",
+        id());
+    return nullptr;
+  }
+
+  return memory()->TranslatePhysical(next_buffer_address);
+}
+
+const uint32_t XmaContext::GetNextPacketReadOffset(
+    uint8_t* buffer, uint32_t next_packet_index,
+    uint32_t current_input_packet_count) {
+  if (next_packet_index >= current_input_packet_count) {
+    return kBitsPerPacketHeader;
+  }
+
+  uint8_t* next_packet = buffer + (next_packet_index * kBytesPerPacket);
+  const uint32_t packet_frame_offset = xma::GetPacketFrameOffset(next_packet);
+
+  if (packet_frame_offset > kMaxFrameSizeinBits) {
+    const uint32_t offset = GetNextPacketReadOffset(
+        buffer, next_packet_index + 1, current_input_packet_count);
+    return offset;
+  }
+
+  const uint32_t new_input_buffer_offset =
+      (next_packet_index * kBitsPerPacket) + packet_frame_offset;
+
+  XELOGAPU("XmaContext {}: new offset: {} packet_offset: {} packet: {}/{}",
+           id(), new_input_buffer_offset, packet_frame_offset,
+           next_packet_index, current_input_packet_count);
+  return new_input_buffer_offset;
+}
+
+const uint32_t XmaContext::GetAmountOfBitsToRead(
+    const uint32_t remaining_stream_bits, const uint32_t frame_size) {
+  return std::min(remaining_stream_bits, frame_size);
+}
+
+uint32_t XmaContext::GetCurrentInputBufferSize(XMA_CONTEXT_DATA* data) {
+  return data->GetCurrentInputBufferPacketCount() * kBytesPerPacket;
+}
+
+uint8_t* XmaContext::GetCurrentInputBuffer(XMA_CONTEXT_DATA* data) {
+  return memory()->TranslatePhysical(data->GetCurrentInputBufferAddress());
+}
+
+const kPacketInfo XmaContext::GetPacketInfo(uint8_t* packet,
+                                            uint32_t frame_offset) {
+  kPacketInfo packet_info = {};
+
+  const uint32_t first_frame_offset = xma::GetPacketFrameOffset(packet);
+  BitStream stream(packet, kBitsPerPacket);
+  stream.SetOffset(first_frame_offset);
+
+  // Handling of splitted frame
+  if (frame_offset < first_frame_offset) {
+    packet_info.current_frame_ = 0;
+    packet_info.current_frame_size_ = first_frame_offset - frame_offset;
+  }
+
+  while (true) {
+    if (stream.BitsRemaining() < kBitsPerFrameHeader) {
       break;
-    else
-    */
-    if (ret < 0) {
-      XELOGE("XmaContext {}: Error - Decoding failed", id());
-      data->parser_error_status = 4;  // TODO(Gliniak): Find all parsing errors
-                                      // and create enumerator from them
-      SwapInputBuffer(data);
-      assert_always();
-      return;  // TODO bail out
     }
-    assert_true(ret == 0);
 
-    {
-      // copy over 1 frame
-      // update input buffer read offset
+    const uint64_t frame_size = stream.Peek(kBitsPerFrameHeader);
+    if (frame_size == xma::kMaxFrameLength) {
+      break;
+    }
 
-      // assert(decoded_consumed_samples_ + kSamplesPerFrame <=
-      //       current_frame_.size());
-      assert_true(av_context_->sample_fmt == AV_SAMPLE_FMT_FLTP);
-      // assert_true(frame_is_split == (frame_idx == -1));
+    if (stream.offset_bits() == frame_offset) {
+      packet_info.current_frame_ = packet_info.frame_count_;
+      packet_info.current_frame_size_ = (uint32_t)frame_size;
+    }
 
-      //			dump_raw(av_frame_, id());
-      ConvertFrame((const uint8_t**)av_frame_->data,
-                   bool(av_frame_->channels > 1), raw_frame_.data());
-      // decoded_consumed_samples_ += kSamplesPerFrame;
+    packet_info.frame_count_++;
 
-      auto byte_count = kBytesPerFrameChannel << data->is_stereo;
-      assert_true(output_remaining_bytes >= byte_count);
-      output_rb.Write(raw_frame_.data(), byte_count);
-      output_remaining_bytes -= byte_count;
-      data->output_buffer_write_offset = output_rb.write_offset() / 256;
+    if (frame_size > stream.BitsRemaining()) {
+      // Last frame.
+      break;
+    }
 
-      total_samples += id_ == 0 ? kSamplesPerFrame : 0;
+    stream.Advance(frame_size - 1);
 
-      uint32_t offset =
-          std::max(kBitsPerHeader, data->input_buffer_read_offset);
-      offset = static_cast<uint32_t>(
-          GetNextFrame(current_input_buffer, current_input_size, offset));
-
-      XELOGAPU(
-          "XmaContext {}: Next Offset: {} (Frame: {}/{} Packet: {}/{} Packet "
-          "Skip: {} - {})",
-          id(), offset, frame_idx, frame_count - 1, packet_idx,
-          current_input_packet_count, xma::GetPacketSkipCount(packet),
-          data->input_buffer_read_offset);
-      if (frame_idx + 1 >= frame_count) {
-        // Skip to next packet (no split frame)
-        packets_skip_ = xma::GetPacketSkipCount(packet) + 1;
-        while (packets_skip_ > 0) {
-          packets_skip_--;
-          packet_idx++;
-          if (packet_idx >= current_input_packet_count) {
-            if (!reuse_input_buffer) {
-              // Last packet. Try setup once more.
-              reuse_input_buffer = TrySetupNextLoop(data, true);
-            }
-            if (!reuse_input_buffer) {
-              if (is_streaming) {
-                SwapInputBuffer(data);
-                data->input_buffer_read_offset =
-                    GetPacketFirstFrameOffset(data);
-              } else {
-                is_stream_done_ = true;
-              }
-              if (output_rb.write_offset() == output_rb.read_offset()) {
-                data->output_buffer_valid = 0;
-              }
-            }
-            return;
-          }
-        }
-        packet = current_input_buffer + packet_idx * kBytesPerPacket;
-        // TODO(Gliniak): There might be an edge-case when we're in packet 26/27
-        // and GetPacketFrameOffset returns that there is no data in this packet
-        // aka. FrameOffset is set to more than 0x7FFF-0x20
-        offset =
-            xma::GetPacketFrameOffset(packet) + packet_idx * kBitsPerPacket;
-      }
-      if (offset == 0 || frame_idx == -1) {
-        // Next packet but we already skipped to it
-        if (packet_idx >= current_input_packet_count) {
-          // Buffer is fully used
-          if (!reuse_input_buffer) {
-            // Last packet. Try setup once more.
-            reuse_input_buffer = TrySetupNextLoop(data, true);
-          }
-          if (!reuse_input_buffer) {
-            if (is_streaming) {
-              SwapInputBuffer(data);
-            } else {
-              is_stream_done_ = true;
-            }
-          }
-          break;
-        }
-        offset =
-            xma::GetPacketFrameOffset(packet) + packet_idx * kBitsPerPacket;
-      }
-      // TODO buffer bounds check
-      assert_true(data->input_buffer_read_offset < offset);
-      data->input_buffer_read_offset = offset;
+    // Read the trailing bit to see if frames follow
+    if (stream.Read(1) == 0) {
+      break;
     }
   }
 
-  // assert_true((split_frame_len_ != 0) == (data->input_buffer_read_offset ==
-  // 0));
-
-  // The game will kick us again with a new output buffer later.
-  // It's important that we only invalidate this if we actually wrote to it!!
-  if (output_rb.write_offset() == output_rb.read_offset()) {
-    data->output_buffer_valid = 0;
+  if (xma::IsPacketXma2Type(packet)) {
+    const uint8_t xma2_frame_count = xma::GetPacketFrameCount(packet);
+    if (xma2_frame_count != packet_info.frame_count_) {
+      XELOGE(
+          "XmaContext {}: XMA2 packet header defines different amount of "
+          "frames than internally found! (Header: {} Found: {})",
+          id(), xma2_frame_count, packet_info.frame_count_);
+    }
   }
+  return packet_info;
 }
 
-uint32_t XmaContext::GetPacketFirstFrameOffset(const XMA_CONTEXT_DATA* data) {
-  uint32_t first_frame_offset = kBitsPerHeader;
-
-  uint8_t* in0 = data->input_buffer_0_valid
-                     ? memory()->TranslatePhysical(data->input_buffer_0_ptr)
-                     : nullptr;
-  uint8_t* in1 = data->input_buffer_1_valid
-                     ? memory()->TranslatePhysical(data->input_buffer_1_ptr)
-                     : nullptr;
-  uint8_t* current_input_buffer = data->current_buffer ? in1 : in0;
-
-  if (current_input_buffer) {
-    first_frame_offset = xma::GetPacketFrameOffset(current_input_buffer);
-  }
-  return first_frame_offset;
-}
-
-size_t XmaContext::GetNextFrame(uint8_t* block, size_t size,
-                                size_t bit_offset) {
-  // offset = xma::GetPacketFrameOffset(packet);
-  // TODO meh
-  // auto next_packet = bit_offset - bit_offset % kBitsPerPacket +
-  // kBitsPerPacket;
-  auto packet_idx = GetFramePacketNumber(block, size, bit_offset);
-
-  BitStream stream(block, size * 8);
-  stream.SetOffset(bit_offset);
-
-  if (stream.BitsRemaining() < 15) {
-    return 0;
+int16_t XmaContext::GetPacketNumber(size_t size, size_t bit_offset) {
+  if (bit_offset < kBitsPerPacketHeader) {
+    assert_always();
+    return -1;
   }
 
-  uint64_t len = stream.Read(15);
-  if ((len - 15) > stream.BitsRemaining()) {
-    // assert_always("TODO");
-    // *bit_offset = next_packet;
-    // return false;
-    // return next_packet;
-    return 0;
-  } else if (len >= xma::kMaxFrameLength) {
-    assert_always("TODO");
-    // *bit_offset = next_packet;
-    // return false;
-    return 0;
-    // return next_packet;
-  }
-
-  stream.Advance(len - (15 + 1));
-  // Read the trailing bit to see if frames follow
-  if (stream.Read(1) == 0) {
-    return 0;
-  }
-
-  bit_offset += len;
-  if (packet_idx < GetFramePacketNumber(block, size, bit_offset)) {
-    return 0;
-  }
-  return bit_offset;
-}
-
-int XmaContext::GetFramePacketNumber(uint8_t* block, size_t size,
-                                     size_t bit_offset) {
-  size *= 8;
-  if (bit_offset >= size) {
-    // Not good :(
+  if (bit_offset >= (size << 3)) {
     assert_always();
     return -1;
   }
@@ -842,101 +657,10 @@ int XmaContext::GetFramePacketNumber(uint8_t* block, size_t size,
   size_t byte_offset = bit_offset >> 3;
   size_t packet_number = byte_offset / kBytesPerPacket;
 
-  return (uint32_t)packet_number;
+  return (int16_t)packet_number;
 }
 
-std::tuple<int, int> XmaContext::GetFrameNumber(uint8_t* block, size_t size,
-                                                size_t bit_offset) {
-  auto packet_idx = GetFramePacketNumber(block, size, bit_offset);
-
-  if (packet_idx < 0 || (packet_idx + 1) * kBytesPerPacket > size) {
-    assert_always();
-    return {packet_idx, -2};
-  }
-
-  if (bit_offset == 0) {
-    return {packet_idx, -1};
-  }
-
-  uint8_t* packet = block + (packet_idx * kBytesPerPacket);
-  auto first_frame_offset = xma::GetPacketFrameOffset(packet);
-  BitStream stream(block, size * 8);
-  stream.SetOffset(packet_idx * kBitsPerPacket + first_frame_offset);
-
-  int frame_idx = 0;
-  while (true) {
-    if (stream.BitsRemaining() < 15) {
-      break;
-    }
-
-    if (stream.offset_bits() == bit_offset) {
-      break;
-    }
-
-    uint64_t size = stream.Read(15);
-    if ((size - 15) > stream.BitsRemaining()) {
-      // Last frame.
-      break;
-    } else if (size == 0x7FFF) {
-      // Invalid frame (and last of this packet)
-      break;
-    }
-
-    stream.Advance(size - (15 + 1));
-
-    // Read the trailing bit to see if frames follow
-    if (stream.Read(1) == 0) {
-      break;
-    }
-    frame_idx++;
-  }
-  return {packet_idx, frame_idx};
-}
-
-std::tuple<int, bool> XmaContext::GetPacketFrameCount(uint8_t* packet) {
-  auto first_frame_offset = xma::GetPacketFrameOffset(packet);
-  if (first_frame_offset > kBitsPerPacket - kBitsPerHeader) {
-    // frame offset is beyond packet end
-    return {0, false};
-  }
-
-  BitStream stream(packet, kBitsPerPacket);
-  stream.SetOffset(first_frame_offset);
-  int frame_count = 0;
-
-  while (true) {
-    if (stream.BitsRemaining() < 15) {
-      return {frame_count, false};
-    }
-
-    frame_count++;
-    uint64_t size = stream.Read(15);
-    if ((size - 15) > stream.BitsRemaining()) {
-      return {frame_count, true};
-    } else if (size == 0x7FFF) {
-      assert_always();
-      return {frame_count, true};
-    }
-
-    stream.Advance(size - (15 + 1));
-
-    if (stream.Read(1) == 0) {
-      return {frame_count, false};
-    }
-    // There is a case when frame ends EXACTLY at the end of packet.
-    // In such case we shouldn't increase frame count by additional not existing
-    // frame and don't mark it as splitted, but as a normal frame
-    if (!stream.BitsRemaining()) {
-      return {frame_count, false};
-    }
-  }
-}
-
-int XmaContext::PrepareDecoder(uint8_t* packet, int sample_rate,
-                               bool is_two_channel) {
-  // Sanity check: Packet metadata is always 1 for XMA2/0 for XMA
-  assert_true((packet[2] & 0x7) == 1 || (packet[2] & 0x7) == 0);
-
+int XmaContext::PrepareDecoder(int sample_rate, bool is_two_channel) {
   sample_rate = GetSampleRate(sample_rate);
 
   // Re-initialize the context with new sample rate and channels.
@@ -959,8 +683,40 @@ int XmaContext::PrepareDecoder(uint8_t* packet, int sample_rate,
   return 0;
 }
 
+void XmaContext::PreparePacket(const uint32_t frame_size,
+                               const uint32_t frame_padding) {
+  av_packet_->data = xma_frame_.data();
+  av_packet_->size =
+      static_cast<int>(1 + ((frame_padding + frame_size) / 8) +
+                       (((frame_padding + frame_size) % 8) ? 1 : 0));
+
+  auto padding_end = av_packet_->size * 8 - (8 + frame_padding + frame_size);
+  assert_true(padding_end < 8);
+  xma_frame_[0] = ((frame_padding & 7) << 5) | ((padding_end & 7) << 2);
+}
+
+bool XmaContext::DecodePacket(AVCodecContext* av_context,
+                              const AVPacket* av_packet, AVFrame* av_frame) {
+  auto ret = avcodec_send_packet(av_context, av_packet);
+  if (ret < 0) {
+    XELOGE("XmaContext {}: Error sending packet for decoding", id());
+    return false;
+  }
+  ret = avcodec_receive_frame(av_context, av_frame);
+
+  if (ret < 0) {
+    XELOGE("XmaContext {}: Error during decoding", id());
+    return false;
+  }
+  return true;
+}
+
 void XmaContext::ConvertFrame(const uint8_t** samples, bool is_two_channel,
                               uint8_t* output_buffer) {
+  if (!output_buffer) {
+    return;
+  }
+
   // Loop through every sample, convert and drop it into the output array.
   // If more than one channel, we need to interleave the samples from each
   // channel next to each other. Always saturate because FFmpeg output is
@@ -1037,4 +793,4 @@ void XmaContext::ConvertFrame(const uint8_t** samples, bool is_two_channel,
 }
 
 }  // namespace apu
-}  // namespace xe
+}  // namespace xe
\ No newline at end of file
diff --git a/src/xenia/apu/xma_context.h b/src/xenia/apu/xma_context.h
index ef187db31..89b63367c 100644
--- a/src/xenia/apu/xma_context.h
+++ b/src/xenia/apu/xma_context.h
@@ -2,7 +2,7 @@
  ******************************************************************************
  * Xenia : Xbox 360 Emulator Research Project                                 *
  ******************************************************************************
- * Copyright 2021 Ben Vanik. All rights reserved.                             *
+ * Copyright 2023 Ben Vanik. All rights reserved.                             *
  * Released under the BSD license - see LICENSE in the root for more details. *
  ******************************************************************************
  */
@@ -14,8 +14,9 @@
 #include <atomic>
 #include <mutex>
 #include <queue>
-// #include <vector>
 
+#include "xenia/base/bit_stream.h"
+#include "xenia/base/ring_buffer.h"
 #include "xenia/memory.h"
 #include "xenia/xbox.h"
 
@@ -76,7 +77,7 @@ struct XMA_CONTEXT_DATA {
 
   // DWORD 2
   uint32_t input_buffer_read_offset : 26;  // XMAGetInputBufferReadOffset
-  uint32_t unk_dword_2 : 6;                // ErrorStatus/ErrorSet (?)
+  uint32_t error_status : 6;               // ErrorStatus/ErrorSet (?)
 
   // DWORD 3
   uint32_t loop_start : 26;          // XMASetLoopData LoopStartOffset
@@ -96,7 +97,7 @@ struct XMA_CONTEXT_DATA {
   // DWORD 7
   uint32_t output_buffer_ptr;  // physical address
   // DWORD 8
-  uint32_t work_buffer_ptr;  // PtrOverlapAdd(?)
+  uint32_t work_buffer_ptr;  // Stores currently processed subframe
 
   // DWORD 9
   // +0bit, XMAGetOutputBufferReadOffset AKA WriteBufferOffsetRead
@@ -115,10 +116,40 @@ struct XMA_CONTEXT_DATA {
   }
 
   void Store(void* ptr) {
+    // TODO: Compare current ptr state with this and see which dwords we should
+    // write
     xe::copy_and_swap(reinterpret_cast<uint32_t*>(ptr),
                       reinterpret_cast<const uint32_t*>(this),
                       sizeof(XMA_CONTEXT_DATA) / 4);
   }
+
+  bool IsInputBufferValid(uint8_t buffer_index) const {
+    return buffer_index == 0 ? input_buffer_0_valid : input_buffer_1_valid;
+  }
+
+  bool IsCurrentInputBufferValid() const {
+    return IsInputBufferValid(current_buffer);
+  }
+
+  bool IsAnyInputBufferValid() const {
+    return input_buffer_0_valid || input_buffer_1_valid;
+  }
+
+  const uint32_t GetInputBufferAddress(uint8_t buffer_index) const {
+    return buffer_index == 0 ? input_buffer_0_ptr : input_buffer_1_ptr;
+  }
+
+  const uint32_t GetCurrentInputBufferAddress() const {
+    return GetInputBufferAddress(current_buffer);
+  }
+
+  const uint32_t GetInputBufferPacketCount(uint8_t buffer_index) const {
+    return buffer_index == 0 ? input_buffer_0_packet_count
+                             : input_buffer_1_packet_count;
+  }
+  const uint32_t GetCurrentInputBufferPacketCount() const {
+    return GetInputBufferPacketCount(current_buffer);
+  }
 };
 static_assert_size(XMA_CONTEXT_DATA, 64);
 
@@ -130,11 +161,28 @@ struct Xma2ExtraData {
 static_assert_size(Xma2ExtraData, 34);
 #pragma pack(pop)
 
+struct kPacketInfo {
+  uint8_t frame_count_;
+  uint8_t current_frame_;
+  uint32_t current_frame_size_;
+
+  const bool isLastFrameInPacket() const {
+    return current_frame_ == frame_count_ - 1;
+  }
+};
+
+static constexpr int kIdToSampleRate[4] = {24000, 32000, 44100, 48000};
+
 class XmaContext {
  public:
   static const uint32_t kBytesPerPacket = 2048;
+  static const uint32_t kBytesPerPacketHeader = 4;
+  static const uint32_t kBytesPerPacketData =
+      kBytesPerPacket - kBytesPerPacketHeader;
+
   static const uint32_t kBitsPerPacket = kBytesPerPacket * 8;
-  static const uint32_t kBitsPerHeader = 32;
+  static const uint32_t kBitsPerPacketHeader = 32;
+  static const uint32_t kBitsPerFrameHeader = 15;
 
   static const uint32_t kBytesPerSample = 2;
   static const uint32_t kSamplesPerFrame = 512;
@@ -144,8 +192,11 @@ class XmaContext {
   static const uint32_t kBytesPerSubframeChannel =
       kSamplesPerSubframe * kBytesPerSample;
 
-  // static const uint32_t kOutputBytesPerBlock = 256;
-  // static const uint32_t kOutputMaxSizeBytes = 31 * kOutputBytesPerBlock;
+  static const uint32_t kOutputBytesPerBlock = 256;
+  static const uint32_t kOutputMaxSizeBytes = 31 * kOutputBytesPerBlock;
+
+  static const uint32_t kLastFrameMarker = 0x7FFF;
+  static const uint32_t kMaxFrameSizeinBits = 0x4000 - kBitsPerPacketHeader;
 
   explicit XmaContext();
   ~XmaContext();
@@ -171,30 +222,44 @@ class XmaContext {
 
  private:
   static void SwapInputBuffer(XMA_CONTEXT_DATA* data);
-  static bool TrySetupNextLoop(XMA_CONTEXT_DATA* data,
-                               bool ignore_input_buffer_offset);
-  static void NextPacket(XMA_CONTEXT_DATA* data);
+  // Convert sampling rate from ID to frequency.
   static int GetSampleRate(int id);
-  // Get the offset of the next frame. Does not traverse packets.
-  static size_t GetNextFrame(uint8_t* block, size_t size, size_t bit_offset);
   // Get the containing packet number of the frame pointed to by the offset.
-  static int GetFramePacketNumber(uint8_t* block, size_t size,
-                                  size_t bit_offset);
-  // Get the packet number and the index of the frame inside that packet
-  static std::tuple<int, int> GetFrameNumber(uint8_t* block, size_t size,
-                                             size_t bit_offset);
-  // Get the number of frames contained in the packet (including truncated) and
-  // if the last frame is split.
-  static std::tuple<int, bool> GetPacketFrameCount(uint8_t* packet);
+  static int16_t GetPacketNumber(size_t size, size_t bit_offset);
+
+  const kPacketInfo GetPacketInfo(uint8_t* packet, uint32_t frame_offset);
+
+  const uint32_t GetAmountOfBitsToRead(const uint32_t remaining_stream_bits,
+                                       const uint32_t frame_size);
+
+  const uint8_t* GetNextPacket(XMA_CONTEXT_DATA* data,
+                               uint32_t next_packet_index,
+                               uint32_t current_input_packet_count);
+
+  const uint32_t GetNextPacketReadOffset(uint8_t* buffer,
+                                         uint32_t next_packet_index,
+                                         uint32_t current_input_packet_count);
+
+  // Returns currently used buffer
+  uint8_t* GetCurrentInputBuffer(XMA_CONTEXT_DATA* data);
+
+  static uint32_t GetCurrentInputBufferSize(XMA_CONTEXT_DATA* data);
 
   // Convert sample format and swap bytes
   static void ConvertFrame(const uint8_t** samples, bool is_two_channel,
                            uint8_t* output_buffer);
 
-  bool ValidFrameOffset(uint8_t* block, size_t size_bytes,
-                        size_t frame_offset_bits);
   void Decode(XMA_CONTEXT_DATA* data);
-  int PrepareDecoder(uint8_t* packet, int sample_rate, bool is_two_channel);
+  void Consume(RingBuffer* output_rb, XMA_CONTEXT_DATA* data);
+
+  void UpdateLoopStatus(XMA_CONTEXT_DATA* data);
+  int PrepareDecoder(int sample_rate, bool is_two_channel);
+  void PreparePacket(const uint32_t frame_size, const uint32_t frame_padding);
+
+  RingBuffer PrepareOutputRingBuffer(XMA_CONTEXT_DATA* data);
+
+  bool DecodePacket(AVCodecContext* av_context, const AVPacket* av_packet,
+                    AVFrame* av_frame);
 
   // This method should be used ONLY when we're at the last packet of the stream
   // and we want to find offset in next buffer
@@ -204,39 +269,24 @@ class XmaContext {
 
   uint32_t id_ = 0;
   uint32_t guest_ptr_ = 0;
+
   xe_mutex lock_;
   volatile bool is_allocated_ = false;
   volatile bool is_enabled_ = false;
-  // bool is_dirty_ = true;
 
   // ffmpeg structures
   AVPacket* av_packet_ = nullptr;
   AVCodec* av_codec_ = nullptr;
   AVCodecContext* av_context_ = nullptr;
   AVFrame* av_frame_ = nullptr;
-  // uint32_t decoded_consumed_samples_ = 0; // TODO do this dynamically
-  // int decoded_idx_ = -1;
 
-  // bool partial_frame_saved_ = false;
-  // bool partial_frame_size_known_ = false;
-  // size_t partial_frame_total_size_bits_ = 0;
-  // size_t partial_frame_start_offset_bits_ = 0;
-  // size_t partial_frame_offset_bits_ = 0;  // blah internal don't use this
-  // std::vector<uint8_t> partial_frame_buffer_;
-  uint32_t packets_skip_ = 0;
-
-  bool is_stream_done_ = false;
-  // bool split_frame_pending_ = false;
-  uint32_t split_frame_len_ = 0;
-  uint32_t split_frame_len_partial_ = 0;
-  uint8_t split_frame_padding_start_ = 0;
+  std::array<uint8_t, kBytesPerPacketData * 2> input_buffer_;
   // first byte contains bit offset information
   std::array<uint8_t, 1 + 4096> xma_frame_;
-
-  // uint8_t* current_frame_ = nullptr;
-  // conversion buffer for 2 channel frame
   std::array<uint8_t, kBytesPerFrameChannel * 2> raw_frame_;
-  // std::vector<uint8_t> current_frame_ = std::vector<uint8_t>(0);
+
+  int32_t remaining_subframe_blocks_in_output_buffer_ = 0;
+  uint8_t current_frame_remaining_subframes_ = 0;
 };
 
 }  // namespace apu
diff --git a/src/xenia/apu/xma_helpers.h b/src/xenia/apu/xma_helpers.h
index 593d2d071..5140c8d0f 100644
--- a/src/xenia/apu/xma_helpers.h
+++ b/src/xenia/apu/xma_helpers.h
@@ -2,7 +2,7 @@
  ******************************************************************************
  * Xenia : Xbox 360 Emulator Research Project                                 *
  ******************************************************************************
- * Copyright 2021 Ben Vanik. All rights reserved.                             *
+ * Copyright 2023 Ben Vanik. All rights reserved.                             *
  * Released under the BSD license - see LICENSE in the root for more details. *
  ******************************************************************************
  */
@@ -20,29 +20,29 @@ namespace xma {
 
 static const uint32_t kMaxFrameLength = 0x7FFF;
 
-// Get number of frames that /begin/ in this packet.
-uint32_t GetPacketFrameCount(uint8_t* packet) {
-  return (uint8_t)(packet[0] >> 2);
+// Get number of frames that /begin/ in this packet. This is valid only for XMA2
+// packets
+const uint8_t GetPacketFrameCount(const uint8_t* packet) {
+  return packet[0] >> 2;
 }
 
+const uint8_t GetPacketMetadata(const uint8_t* packet) {
+  return packet[2] & 0x7;
+}
+
+const bool IsPacketXma2Type(const uint8_t* packet) {
+  return GetPacketMetadata(packet) == 1;
+}
+
+const uint8_t GetPacketSkipCount(const uint8_t* packet) { return packet[3]; }
+
 // Get the first frame offset in bits
-uint32_t GetPacketFrameOffset(uint8_t* packet) {
+uint32_t GetPacketFrameOffset(const uint8_t* packet) {
   uint32_t val = (uint16_t)(((packet[0] & 0x3) << 13) | (packet[1] << 5) |
                             (packet[2] >> 3));
-  // if (val > kBitsPerPacket - kBitsPerHeader) {
-  //   // There is no data in this packet
-  //   return -1;
-  // } else {
   return val + 32;
-  // }
 }
 
-uint32_t GetPacketMetadata(uint8_t* packet) {
-  return (uint8_t)(packet[2] & 0x7);
-}
-
-uint32_t GetPacketSkipCount(uint8_t* packet) { return (uint8_t)(packet[3]); }
-
 }  // namespace xma
 }  // namespace apu
 }  // namespace xe