New WIP audio decoder

2015-08-22 11:11:57 -05:00 · 2015-08-22 11:11:57 -05:00 · 0f9cd8cfb3
parent f2b2a22687
commit 0f9cd8cfb3
5 changed files with 245 additions and 116 deletions
--- a/.gitmodules
+++ b/.gitmodules
@ -19,3 +19,6 @@
 [submodule "build_tools"]
 	path = build_tools
 	url = https://github.com/xenia-project/build-tools.git
+[submodule "third_party/libav"]
+	path = third_party/libav
+	url = https://github.com/xenia-project/libav.git
--- a/src/xenia/apu/xma_context.cc
+++ b/src/xenia/apu/xma_context.cc
@ -13,13 +13,17 @@
 #include <cstring>

 #include "xenia/apu/xma_decoder.h"
+#include "xenia/apu/xma_helpers.h"
 #include "xenia/base/logging.h"
 #include "xenia/base/ring_buffer.h"
 #include "xenia/profiling.h"

 extern "C" {
 #include "libavcodec/avcodec.h"
+#include "libavcodec/xma2dec.h"
 #include "libavutil/channel_layout.h"
+
+extern AVCodec ff_xma2_decoder;
 }  // extern "C"

 // Credits for most of this code goes to:
@ -50,14 +54,8 @@ int XmaContext::Setup(uint32_t id, Memory* memory, uint32_t guest_ptr) {
  memory_ = memory;
  guest_ptr_ = guest_ptr;

-  static bool avcodec_initialized = false;
-  if (!avcodec_initialized) {
-    avcodec_register_all();
-    avcodec_initialized = true;
-  }
-
  // Allocate important stuff.
-  codec_ = avcodec_find_decoder(AV_CODEC_ID_WMAPRO);
+  codec_ = &ff_xma2_decoder;
  if (!codec_) {
    return 1;
  }
@ -91,7 +89,7 @@ int XmaContext::Setup(uint32_t id, Memory* memory, uint32_t guest_ptr) {

  // Current frame stuff whatever
  // samples per frame * 2 max channels * output bytes
-  current_frame_ = new uint8_t[kSamplesPerFrame * 2 * 2];
+  current_frame_ = new uint8_t[kSamplesPerFrame * kBytesPerSample * 2];
  current_frame_pos_ = 0;
  frame_samples_size_ = 0;

@ -119,11 +117,10 @@ void XmaContext::Enable() {
  auto context_ptr = memory()->TranslateVirtual(guest_ptr());
  XMA_CONTEXT_DATA data(context_ptr);

-  XELOGAPU(
-      "XmaContext: kicking context %d (%d/%d bytes)", id(),
-      (data.input_buffer_read_offset & ~0x7FF) / 8,
-      (data.input_buffer_0_packet_count + data.input_buffer_1_packet_count) *
-          kBytesPerPacket);
+  XELOGAPU("XmaContext: kicking context %d (%d/%d bits)", id(),
+           data.input_buffer_read_offset, (data.input_buffer_0_packet_count +
+                                           data.input_buffer_1_packet_count) *
+                                              kBytesPerPacket * 8);

  data.Store(context_ptr);

@ -227,6 +224,26 @@ void XmaContext::DecodePackets(XMA_CONTEXT_DATA* data) {
    return;
  }

+  // XAudio Loops
+  // loop_count:
+  //  - XAUDIO2_MAX_LOOP_COUNT = 254
+  //  - XAUDIO2_LOOP_INFINITE = 255
+  // loop_start/loop_end are bit offsets to a specific frame
+  //assert_true(data->loop_count == 0);
+
+  // Translate pointers for future use.
+  uint8_t* in0 = data->input_buffer_0_valid
+                     ? memory()->TranslatePhysical(data->input_buffer_0_ptr)
+                     : nullptr;
+  uint8_t* in1 = data->input_buffer_1_valid
+                     ? memory()->TranslatePhysical(data->input_buffer_1_ptr)
+                     : nullptr;
+
+  size_t input_buffer_0_size =
+      data->input_buffer_0_packet_count * kBytesPerPacket;
+  size_t input_buffer_1_size =
+      data->input_buffer_1_packet_count * kBytesPerPacket;
+
  // Output buffers are in raw PCM samples, 256 bytes per block.
  // Output buffer is a ring buffer. We need to write from the write offset
  // to the read offset.
@ -246,76 +263,175 @@ void XmaContext::DecodePackets(XMA_CONTEXT_DATA* data) {

  // Decode until we can't write any more data.
  while (output_remaining_bytes > 0) {
-    // This'll copy audio samples into the output buffer.
-    // The samples need to be 2 bytes long!
-    // Copies one frame at a time, so keep calling this until size == 0.
-    int written_bytes = 0;
-    int decode_attempts_remaining = 3;
-
-    uint8_t work_buffer[kOutputMaxSizeBytes];
-    while (decode_attempts_remaining) {
-      size_t read_bytes = 0;
-      written_bytes =
-          DecodePacket(work_buffer, 0, output_remaining_bytes, &read_bytes);
-      if (written_bytes >= 0) {
-        // assert_true((written_bytes % 256) == 0);
-        auto written_bytes_rb = output_rb.Write(work_buffer, written_bytes);
-        assert_true(written_bytes == written_bytes_rb);
-
-        // Ok.
-        break;
-      } else if (read_bytes % 2048 == 0) {
-        // Sometimes the decoder will fail on a packet. I think it's
-        // looking for cross-packet frames and failing. If you run it again
-        // on the same packet it'll work though.
-        --decode_attempts_remaining;
-      } else {
-        // Failed in the middle of a packet, do not retry!
-        decode_attempts_remaining = 0;
-        break;
-      }
-    }
-
-    if (!decode_attempts_remaining) {
-      XELOGAPU("XmaContext: libav failed to decode packet (returned %.8X)",
-               -written_bytes);
-
-      // Failed out.
-      if (data->input_buffer_0_valid || data->input_buffer_1_valid) {
-        // There's new data available - maybe we'll be ok if we decode it?
-        written_bytes = 0;
-        DiscardPacket();
-      } else {
-        // No data and hosed - bail.
-        break;
-      }
-    }
-
-    data->output_buffer_write_offset = output_rb.write_offset() / 256;
-    output_remaining_bytes -= written_bytes;
-
-    // If we need more data and the input buffers have it, grab it.
-    if (written_bytes) {
-      // Haven't finished with current packet.
-      continue;
-    } else if (data->input_buffer_0_valid || data->input_buffer_1_valid) {
-      // Done with previous packet, so grab a new one.
-      int ret = StartPacket(data);
-      if (ret <= 0) {
-        // No more data (but may have prepared a packet)
-        data->input_buffer_0_valid = 0;
-        data->input_buffer_1_valid = 0;
-      }
-    } else {
-      // Decoder is out of data and there's no more to give.
+    if (!data->input_buffer_0_valid && !data->input_buffer_1_valid) {
+      // Out of data.
      break;
    }
+
+    int num_channels = data->is_stereo ? 2 : 1;
+
+    // Check if we have part of a frame waiting (and the game hasn't jumped
+    // around)
+    if (current_frame_pos_ &&
+        last_input_read_pos_ == data->input_buffer_read_offset) {
+      size_t to_write = std::min(
+          output_remaining_bytes,
+          ((size_t)kBytesPerFrame * num_channels - current_frame_pos_));
+      output_rb.Write(current_frame_, to_write);
+
+      current_frame_pos_ += to_write;
+      if (current_frame_pos_ >= kBytesPerFrame * num_channels) {
+        current_frame_pos_ = 0;
+      }
+
+      data->output_buffer_write_offset = output_rb.write_offset() / 256;
+      output_remaining_bytes -= to_write;
+      continue;
+    }
+
+    int block_last_frame = 0;  // last frame in block?
+    int got_frame = 0;         // successfully decoded a frame?
+    int frame_size = 0;
+    packet_->data = in0;
+    packet_->size = data->input_buffer_0_packet_count * 2048;
+    PrepareDecoder(in0, data->input_buffer_0_packet_count * 2048,
+                   data->sample_rate, num_channels);
+    int len = xma2_decode_frame(context_, packet_, decoded_frame_, &got_frame,
+                                &block_last_frame, &frame_size,
+                                data->input_buffer_read_offset);
+    if (block_last_frame) {
+      data->input_buffer_0_valid = 0;
+      data->input_buffer_1_valid = 0;
+      data->output_buffer_valid = 0;
+      continue;
+    }
+
+    if (len == AVERROR_EOF) {
+      // Screw this gtfo
+      data->input_buffer_0_valid = 0;
+      data->input_buffer_1_valid = 0;
+      data->output_buffer_valid = 0;
+
+      continue;
+    } else if (len < 0 || !got_frame) {
+      // Oh no! Skip the frame and hope everything works.
+      data->input_buffer_read_offset += frame_size;
+
+      continue;
+    }
+
+    XELOGD("LEN: %d (%x)", len, len);
+
+    data->input_buffer_read_offset += len;
+    last_input_read_pos_ = data->input_buffer_read_offset;
+
+    // Copy to the output buffer.
+    // Successfully decoded a frame.
+    size_t written_bytes = 0;
+    if (got_frame) {
+      // Validity checks.
+      if (decoded_frame_->nb_samples > kSamplesPerFrame) {
+        return;
+      } else if (context_->sample_fmt != AV_SAMPLE_FMT_FLTP) {
+        return;
+      }
+
+      // Check the returned buffer size.
+      if (av_samples_get_buffer_size(NULL, context_->channels,
+                                     decoded_frame_->nb_samples,
+                                     context_->sample_fmt, 1) !=
+          context_->channels * decoded_frame_->nb_samples * sizeof(float)) {
+        return;
+      }
+
+      // Loop through every sample, convert and drop it into the output array.
+      // If more than one channel, the game wants the samples from each channel
+      // interleaved next to each other.
+      uint32_t o = 0;
+      for (int i = 0; i < decoded_frame_->nb_samples; i++) {
+        for (int j = 0; j < context_->channels; j++) {
+          // Select the appropriate array based on the current channel.
+          auto sample_array = reinterpret_cast<float*>(decoded_frame_->data[j]);
+
+          // Raw sample should be within [-1, 1].
+          // Clamp it, just in case.
+          float raw_sample = xe::saturate(sample_array[i]);
+
+          // Convert the sample and output it in big endian.
+          float scaled_sample = raw_sample * ((1 << 15) - 1);
+          int sample = static_cast<int>(scaled_sample);
+          xe::store_and_swap<uint16_t>(&current_frame_[o++ * 2],
+                                       sample & 0xFFFF);
+        }
+      }
+      current_frame_pos_ = 0;
+
+      if (output_remaining_bytes < kBytesPerFrame * num_channels) {
+        // Output buffer isn't big enough to store the entire frame! Write out a
+        // part of it.
+        current_frame_pos_ = output_remaining_bytes;
+        output_rb.Write(current_frame_, output_remaining_bytes);
+
+        written_bytes = output_remaining_bytes;
+      } else {
+        output_rb.Write(current_frame_, kBytesPerFrame * num_channels);
+
+        written_bytes = kBytesPerFrame * num_channels;
+      }
+    }
+
+    output_remaining_bytes -= written_bytes;
+    data->output_buffer_write_offset = output_rb.write_offset() / 256;
  }

  // The game will kick us again with a new output buffer later.
  data->output_buffer_valid = 0;
 }

+uint32_t XmaContext::GetFramePacketNumber(uint8_t* block, size_t size,
+                                          size_t bit_offset) {
+  size *= 8;
+  if (bit_offset >= size) {
+    // Not good :(
+    assert_always();
+    return -1;
+  }
+
+  size_t byte_offset = bit_offset >> 3;
+  size_t packet_number = byte_offset / kBytesPerPacket;
+
+  return (uint32_t)packet_number;
+}
+
+int XmaContext::PrepareDecoder(uint8_t* block, size_t size, int sample_rate,
+                               int channels) {
+  // Sanity check: Packet metadata is always 1 for XMA2
+  assert_true((block[2] & 0x7) == 1);
+
+  sample_rate = GetSampleRate(sample_rate);
+
+  // Re-initialize the context with new sample rate and channels.
+  if (context_->sample_rate != sample_rate || context_->channels != channels) {
+    // We have to reopen the codec so it'll realloc whatever data it needs.
+    // TODO(DrChat): Find a better way.
+    avcodec_close(context_);
+
+    context_->sample_rate = sample_rate;
+    context_->channels = channels;
+    extra_data_.channel_mask =
+        channels == 2 ? AV_CH_LAYOUT_STEREO : AV_CH_LAYOUT_MONO;
+
+    if (avcodec_open2(context_, codec_, NULL) < 0) {
+      XELOGE("XmaContext: Failed to reopen libav context");
+      return 1;
+    }
+  }
+
+  av_frame_unref(decoded_frame_);
+
+  return 0;
+}
+
 int XmaContext::StartPacket(XMA_CONTEXT_DATA* data) {
  // Translate pointers for future use.
  uint8_t* in0 = data->input_buffer_0_valid
@ -340,41 +456,49 @@ int XmaContext::StartPacket(XMA_CONTEXT_DATA* data) {
  // Total input size
  uint32_t input_size_bytes = input_size_0_bytes + input_size_1_bytes;

+  // Calculate the first frame offset we need to decode.
+  uint32_t frame_offset_bits = (data->input_buffer_read_offset % (2048 * 8));
+
  // Input read offset is in bits. Typically starts at 32 (4 bytes).
  // "Sequence" offset - used internally for WMA Pro decoder.
  // Just the read offset.
-  uint32_t seq_offset_bytes = (data->input_buffer_read_offset & ~0x7FF) / 8;
-  uint32_t input_remaining_bytes = input_size_bytes - seq_offset_bytes;
+  // NOTE: Read offset may not be at the first frame in a packet!
+  uint32_t packet_offset_bytes = (data->input_buffer_read_offset & ~0x7FF) / 8;
+  if (packet_offset_bytes % 2048 != 0) {
+    packet_offset_bytes -= packet_offset_bytes % 2048;
+  }
+  uint32_t input_remaining_bytes = input_size_bytes - packet_offset_bytes;

-  if (seq_offset_bytes < input_size_bytes) {
-    // Setup input offset and input buffer.
-    uint32_t input_offset_bytes = seq_offset_bytes;
-    auto input_buffer = in0;
-
-    if (seq_offset_bytes >= input_size_0_bytes && input_size_1_bytes) {
-      // Size overlap, select input buffer 1.
-      // TODO(DrChat): This needs testing.
-      input_offset_bytes -= input_size_0_bytes;
-      input_buffer = in1;
-    }
-
-    // Still have data to read.
-    auto packet = input_buffer + input_offset_bytes;
-    assert_true(input_offset_bytes % 2048 == 0);
-    PreparePacket(packet, seq_offset_bytes, kBytesPerPacket, sample_rate,
-                  channels);
-    data->input_buffer_read_offset += kBytesPerPacket * 8;
-
-    input_remaining_bytes -= kBytesPerPacket;
-    if (input_remaining_bytes <= 0) {
-      // Used the last of the data but prepared a packet.
-      return 0;
-    }
-  } else {
+  if (packet_offset_bytes >= input_size_bytes) {
    // No more data available and no packet prepared.
    return -1;
  }

+  // Setup input offset and input buffer.
+  uint32_t input_offset_bytes = packet_offset_bytes;
+  auto input_buffer = in0;
+
+  if (packet_offset_bytes >= input_size_0_bytes && input_size_1_bytes) {
+    // Size overlap, select input buffer 1.
+    // TODO(DrChat): This needs testing.
+    input_offset_bytes -= input_size_0_bytes;
+    input_buffer = in1;
+  }
+
+  // Still have data to read.
+  auto packet = input_buffer + input_offset_bytes;
+  assert_true(input_offset_bytes % 2048 == 0);
+  PreparePacket(packet, packet_offset_bytes, kBytesPerPacket, sample_rate,
+                channels);
+
+  data->input_buffer_read_offset += kBytesPerPacket * 8;
+
+  input_remaining_bytes -= kBytesPerPacket;
+  if (input_remaining_bytes <= 0) {
+    // Used the last of the data but prepared a packet.
+    return 0;
+  }
+
  return input_remaining_bytes;
 }

@ -390,15 +514,11 @@ int XmaContext::PreparePacket(uint8_t* input, size_t seq_offset, size_t size,
    return 1;
  }

-  std::memcpy(packet_data_, input, size);
+  // Packet metadata is always 1 for XMA2
+  assert_true((input[2] & 0x7) == 1);

-  // Modify the packet header so it's WMAPro compatible.
-  auto int_packet_data = reinterpret_cast<int*>(packet_data_);
-  *int_packet_data =
-      (((seq_offset & 0x7800) | 0x400) >> 7) | (*int_packet_data & 0xFFFEFF08);
-
-  packet_->data = packet_data_;
-  packet_->size = kBytesPerPacket;
+  packet_->data = input;
+  packet_->size = (int)size;

  // Re-initialize the context with new sample rate and channels.
  if (context_->sample_rate != sample_rate || context_->channels != channels) {
--- a/src/xenia/apu/xma_context.h
+++ b/src/xenia/apu/xma_context.h
@ -60,8 +60,8 @@ struct XMA_CONTEXT_DATA {
  uint32_t input_buffer_1_packet_count : 12;  // XMASetInputBuffer1, number of
                                              // 2KB packets. Max 4095 packets.
                                              // These packets form a block.
-  uint32_t loop_subframe_end : 2;             // +12bit, XMASetLoopData
-  uint32_t unk_dword_1_a : 3;                 // ? might be loop_subframe_skip
+  uint32_t loop_subframe_start : 2;             // +12bit, XMASetLoopData
+  uint32_t loop_subframe_end : 3;               // +14bit, XMASetLoopData
  uint32_t loop_subframe_skip : 3;            // +17bit, XMASetLoopData might be
                                              // subframe_decode_count
  uint32_t subframe_decode_count : 4;  // +20bit might be subframe_skip_count
@ -91,7 +91,7 @@ struct XMA_CONTEXT_DATA {
  // DWORD 7
  uint32_t output_buffer_ptr;  // physical address
  // DWORD 8
-  uint32_t overlap_add_ptr;  // PtrOverlapAdd(?)
+  uint32_t work_buffer_ptr;  // PtrOverlapAdd(?)

  // DWORD 9
  // +0bit, XMAGetOutputBufferReadOffset AKA WriteBufferOffsetRead
@ -133,6 +133,7 @@ class XmaContext {
  static const uint32_t kBytesPerSample = 2;
  static const uint32_t kSamplesPerFrame = 512;
  static const uint32_t kSamplesPerSubframe = 128;
+  static const uint32_t kBytesPerFrame = kSamplesPerFrame * kBytesPerSample;
  static const uint32_t kBytesPerSubframe =
      kSamplesPerSubframe * kBytesPerSample;

@ -165,6 +166,10 @@ class XmaContext {
  static int GetSampleRate(int id);

  void DecodePackets(XMA_CONTEXT_DATA* data);
+  uint32_t GetFramePacketNumber(uint8_t* block, size_t size, size_t bit_offset);
+  int PrepareDecoder(uint8_t* block, size_t size, int sample_rate,
+                     int channels);
+
  int StartPacket(XMA_CONTEXT_DATA* data);

  int PreparePacket(uint8_t* input, size_t seq_offset, size_t size,
@ -189,11 +194,11 @@ class XmaContext {
  AVPacket* packet_ = nullptr;
  WmaProExtraData extra_data_;

+  // If we didn't finish writing a frame to the output buffer, this is the offset.
  size_t current_frame_pos_ = 0;
+  uint32_t last_input_read_pos_ = 0; // Last seen read buffer pos
  uint8_t* current_frame_ = nullptr;
  uint32_t frame_samples_size_ = 0;
-
-  uint8_t packet_data_[kBytesPerPacket];
 };

 }  // namespace apu
--- a/src/xenia/kernel/xboxkrnl_audio_xma.cc
+++ b/src/xenia/kernel/xboxkrnl_audio_xma.cc
@ -171,7 +171,7 @@ SHIM_CALL XMASetLoopData_shim(PPCContext* ppc_context,
  context.loop_end = loop_data->loop_end;
  context.loop_count = loop_data->loop_count;
  context.loop_subframe_end = loop_data->loop_subframe_end;
-  context.loop_subframe_skip = loop_data->loop_subframe_end;
+  context.loop_subframe_skip = loop_data->loop_subframe_skip;

  context.Store(SHIM_MEM_ADDR(context_ptr));

--- a/third_party/libav
+++ b/third_party/libav
@ -0,0 +1 @@
+Subproject commit 4752bdcdbaf61afc933e667a016bca4eb389ac21
				`@ -0,0 +1 @@`
				`Subproject commit 4752bdcdbaf61afc933e667a016bca4eb389ac21`