New WIP audio decoder

2015-08-22 11:11:57 -05:00 · 2015-08-22 11:11:57 -05:00 · 0f9cd8cfb3
parent f2b2a22687
commit 0f9cd8cfb3
5 changed files with 245 additions and 116 deletions
--- a/.gitmodules
+++ b/.gitmodules
@ -19,3 +19,6 @@
 [submodule "build_tools"]
 	path = build_tools
 	url = https://github.com/xenia-project/build-tools.git
 [submodule "third_party/libav"]
 	path = third_party/libav
 	url = https://github.com/xenia-project/libav.git
--- a/src/xenia/apu/xma_context.cc
+++ b/src/xenia/apu/xma_context.cc
@ -13,13 +13,17 @@
 #include <cstring>
 #include "xenia/apu/xma_decoder.h"
 #include "xenia/apu/xma_helpers.h"
 #include "xenia/base/logging.h"
 #include "xenia/base/ring_buffer.h"
 #include "xenia/profiling.h"
 extern "C" {
 #include "libavcodec/avcodec.h"
 #include "libavcodec/xma2dec.h"
 #include "libavutil/channel_layout.h"
 extern AVCodec ff_xma2_decoder;
 }  // extern "C"
 // Credits for most of this code goes to:
@ -50,14 +54,8 @@ int XmaContext::Setup(uint32_t id, Memory* memory, uint32_t guest_ptr) {
  memory_ = memory;
  guest_ptr_ = guest_ptr;
  static bool avcodec_initialized = false;
  if (!avcodec_initialized) {
    avcodec_register_all();
    avcodec_initialized = true;
  }
  // Allocate important stuff.
-  codec_ = avcodec_find_decoder(AV_CODEC_ID_WMAPRO);
+  codec_ = &ff_xma2_decoder;
  if (!codec_) {
    return 1;
  }
@ -91,7 +89,7 @@ int XmaContext::Setup(uint32_t id, Memory* memory, uint32_t guest_ptr) {
  // Current frame stuff whatever
  // samples per frame * 2 max channels * output bytes
-  current_frame_ = new uint8_t[kSamplesPerFrame * 2 * 2];
+  current_frame_ = new uint8_t[kSamplesPerFrame * kBytesPerSample * 2];
  current_frame_pos_ = 0;
  frame_samples_size_ = 0;
@ -119,11 +117,10 @@ void XmaContext::Enable() {
  auto context_ptr = memory()->TranslateVirtual(guest_ptr());
  XMA_CONTEXT_DATA data(context_ptr);
-  XELOGAPU(
+  XELOGAPU("XmaContext: kicking context %d (%d/%d bits)", id(),
-      "XmaContext: kicking context %d (%d/%d bytes)", id(),
+           data.input_buffer_read_offset, (data.input_buffer_0_packet_count +
-      (data.input_buffer_read_offset & ~0x7FF) / 8,
+                                           data.input_buffer_1_packet_count) *
-      (data.input_buffer_0_packet_count + data.input_buffer_1_packet_count) *
+                                              kBytesPerPacket * 8);
          kBytesPerPacket);
  data.Store(context_ptr);
@ -227,6 +224,26 @@ void XmaContext::DecodePackets(XMA_CONTEXT_DATA* data) {
    return;
  }
  // XAudio Loops
  // loop_count:
  //  - XAUDIO2_MAX_LOOP_COUNT = 254
  //  - XAUDIO2_LOOP_INFINITE = 255
  // loop_start/loop_end are bit offsets to a specific frame
  //assert_true(data->loop_count == 0);
  // Translate pointers for future use.
  uint8_t* in0 = data->input_buffer_0_valid
                     ? memory()->TranslatePhysical(data->input_buffer_0_ptr)
                     : nullptr;
  uint8_t* in1 = data->input_buffer_1_valid
                     ? memory()->TranslatePhysical(data->input_buffer_1_ptr)
                     : nullptr;
  size_t input_buffer_0_size =
      data->input_buffer_0_packet_count * kBytesPerPacket;
  size_t input_buffer_1_size =
      data->input_buffer_1_packet_count * kBytesPerPacket;
  // Output buffers are in raw PCM samples, 256 bytes per block.
  // Output buffer is a ring buffer. We need to write from the write offset
  // to the read offset.
@ -246,76 +263,175 @@ void XmaContext::DecodePackets(XMA_CONTEXT_DATA* data) {
  // Decode until we can't write any more data.
  while (output_remaining_bytes > 0) {
-    // This'll copy audio samples into the output buffer.
+    if (!data->input_buffer_0_valid && !data->input_buffer_1_valid) {
-    // The samples need to be 2 bytes long!
+      // Out of data.
    // Copies one frame at a time, so keep calling this until size == 0.
    int written_bytes = 0;
    int decode_attempts_remaining = 3;
    uint8_t work_buffer[kOutputMaxSizeBytes];
    while (decode_attempts_remaining) {
      size_t read_bytes = 0;
      written_bytes =
          DecodePacket(work_buffer, 0, output_remaining_bytes, &read_bytes);
      if (written_bytes >= 0) {
        // assert_true((written_bytes % 256) == 0);
        auto written_bytes_rb = output_rb.Write(work_buffer, written_bytes);
        assert_true(written_bytes == written_bytes_rb);
        // Ok.
        break;
      } else if (read_bytes % 2048 == 0) {
        // Sometimes the decoder will fail on a packet. I think it's
        // looking for cross-packet frames and failing. If you run it again
        // on the same packet it'll work though.
        --decode_attempts_remaining;
      } else {
        // Failed in the middle of a packet, do not retry!
        decode_attempts_remaining = 0;
        break;
      }
    }
    if (!decode_attempts_remaining) {
      XELOGAPU("XmaContext: libav failed to decode packet (returned %.8X)",
               -written_bytes);
      // Failed out.
      if (data->input_buffer_0_valid || data->input_buffer_1_valid) {
        // There's new data available - maybe we'll be ok if we decode it?
        written_bytes = 0;
        DiscardPacket();
      } else {
        // No data and hosed - bail.
        break;
      }
    }
    data->output_buffer_write_offset = output_rb.write_offset() / 256;
    output_remaining_bytes -= written_bytes;
    // If we need more data and the input buffers have it, grab it.
    if (written_bytes) {
      // Haven't finished with current packet.
      continue;
    } else if (data->input_buffer_0_valid || data->input_buffer_1_valid) {
      // Done with previous packet, so grab a new one.
      int ret = StartPacket(data);
      if (ret <= 0) {
        // No more data (but may have prepared a packet)
        data->input_buffer_0_valid = 0;
        data->input_buffer_1_valid = 0;
      }
    } else {
      // Decoder is out of data and there's no more to give.
      break;
    }
    int num_channels = data->is_stereo ? 2 : 1;
    // Check if we have part of a frame waiting (and the game hasn't jumped
    // around)
    if (current_frame_pos_ &&
        last_input_read_pos_ == data->input_buffer_read_offset) {
      size_t to_write = std::min(
          output_remaining_bytes,
          ((size_t)kBytesPerFrame * num_channels - current_frame_pos_));
      output_rb.Write(current_frame_, to_write);
      current_frame_pos_ += to_write;
      if (current_frame_pos_ >= kBytesPerFrame * num_channels) {
        current_frame_pos_ = 0;
      }
      data->output_buffer_write_offset = output_rb.write_offset() / 256;
      output_remaining_bytes -= to_write;
      continue;
    }
    int block_last_frame = 0;  // last frame in block?
    int got_frame = 0;         // successfully decoded a frame?
    int frame_size = 0;
    packet_->data = in0;
    packet_->size = data->input_buffer_0_packet_count * 2048;
    PrepareDecoder(in0, data->input_buffer_0_packet_count * 2048,
                   data->sample_rate, num_channels);
    int len = xma2_decode_frame(context_, packet_, decoded_frame_, &got_frame,
                                &block_last_frame, &frame_size,
                                data->input_buffer_read_offset);
    if (block_last_frame) {
      data->input_buffer_0_valid = 0;
      data->input_buffer_1_valid = 0;
      data->output_buffer_valid = 0;
      continue;
    }
    if (len == AVERROR_EOF) {
      // Screw this gtfo
      data->input_buffer_0_valid = 0;
      data->input_buffer_1_valid = 0;
      data->output_buffer_valid = 0;
      continue;
    } else if (len < 0 || !got_frame) {
      // Oh no! Skip the frame and hope everything works.
      data->input_buffer_read_offset += frame_size;
      continue;
    }
    XELOGD("LEN: %d (%x)", len, len);
    data->input_buffer_read_offset += len;
    last_input_read_pos_ = data->input_buffer_read_offset;
    // Copy to the output buffer.
    // Successfully decoded a frame.
    size_t written_bytes = 0;
    if (got_frame) {
      // Validity checks.
      if (decoded_frame_->nb_samples > kSamplesPerFrame) {
        return;
      } else if (context_->sample_fmt != AV_SAMPLE_FMT_FLTP) {
        return;
      }
      // Check the returned buffer size.
      if (av_samples_get_buffer_size(NULL, context_->channels,
                                     decoded_frame_->nb_samples,
                                     context_->sample_fmt, 1) !=
          context_->channels * decoded_frame_->nb_samples * sizeof(float)) {
        return;
      }
      // Loop through every sample, convert and drop it into the output array.
      // If more than one channel, the game wants the samples from each channel
      // interleaved next to each other.
      uint32_t o = 0;
      for (int i = 0; i < decoded_frame_->nb_samples; i++) {
        for (int j = 0; j < context_->channels; j++) {
          // Select the appropriate array based on the current channel.
          auto sample_array = reinterpret_cast<float*>(decoded_frame_->data[j]);
          // Raw sample should be within [-1, 1].
          // Clamp it, just in case.
          float raw_sample = xe::saturate(sample_array[i]);
          // Convert the sample and output it in big endian.
          float scaled_sample = raw_sample * ((1 << 15) - 1);
          int sample = static_cast<int>(scaled_sample);
          xe::store_and_swap<uint16_t>(&current_frame_[o++ * 2],
                                       sample & 0xFFFF);
        }
      }
      current_frame_pos_ = 0;
      if (output_remaining_bytes < kBytesPerFrame * num_channels) {
        // Output buffer isn't big enough to store the entire frame! Write out a
        // part of it.
        current_frame_pos_ = output_remaining_bytes;
        output_rb.Write(current_frame_, output_remaining_bytes);
        written_bytes = output_remaining_bytes;
      } else {
        output_rb.Write(current_frame_, kBytesPerFrame * num_channels);
        written_bytes = kBytesPerFrame * num_channels;
      }
    }
    output_remaining_bytes -= written_bytes;
    data->output_buffer_write_offset = output_rb.write_offset() / 256;
  }
  // The game will kick us again with a new output buffer later.
  data->output_buffer_valid = 0;
 }
 uint32_t XmaContext::GetFramePacketNumber(uint8_t* block, size_t size,
                                          size_t bit_offset) {
  size *= 8;
  if (bit_offset >= size) {
    // Not good :(
    assert_always();
    return -1;
  }
  size_t byte_offset = bit_offset >> 3;
  size_t packet_number = byte_offset / kBytesPerPacket;
  return (uint32_t)packet_number;
 }
 int XmaContext::PrepareDecoder(uint8_t* block, size_t size, int sample_rate,
                               int channels) {
  // Sanity check: Packet metadata is always 1 for XMA2
  assert_true((block[2] & 0x7) == 1);
  sample_rate = GetSampleRate(sample_rate);
  // Re-initialize the context with new sample rate and channels.
  if (context_->sample_rate != sample_rate || context_->channels != channels) {
    // We have to reopen the codec so it'll realloc whatever data it needs.
    // TODO(DrChat): Find a better way.
    avcodec_close(context_);
    context_->sample_rate = sample_rate;
    context_->channels = channels;
    extra_data_.channel_mask =
        channels == 2 ? AV_CH_LAYOUT_STEREO : AV_CH_LAYOUT_MONO;
    if (avcodec_open2(context_, codec_, NULL) < 0) {
      XELOGE("XmaContext: Failed to reopen libav context");
      return 1;
    }
  }
  av_frame_unref(decoded_frame_);
  return 0;
 }
 int XmaContext::StartPacket(XMA_CONTEXT_DATA* data) {
  // Translate pointers for future use.
  uint8_t* in0 = data->input_buffer_0_valid
@ -340,41 +456,49 @@ int XmaContext::StartPacket(XMA_CONTEXT_DATA* data) {
  // Total input size
  uint32_t input_size_bytes = input_size_0_bytes + input_size_1_bytes;
  // Calculate the first frame offset we need to decode.
  uint32_t frame_offset_bits = (data->input_buffer_read_offset % (2048 * 8));
  // Input read offset is in bits. Typically starts at 32 (4 bytes).
  // "Sequence" offset - used internally for WMA Pro decoder.
  // Just the read offset.
-  uint32_t seq_offset_bytes = (data->input_buffer_read_offset & ~0x7FF) / 8;
+  // NOTE: Read offset may not be at the first frame in a packet!
-  uint32_t input_remaining_bytes = input_size_bytes - seq_offset_bytes;
+  uint32_t packet_offset_bytes = (data->input_buffer_read_offset & ~0x7FF) / 8;
  if (packet_offset_bytes % 2048 != 0) {
    packet_offset_bytes -= packet_offset_bytes % 2048;
  }
  uint32_t input_remaining_bytes = input_size_bytes - packet_offset_bytes;
-  if (seq_offset_bytes < input_size_bytes) {
+  if (packet_offset_bytes >= input_size_bytes) {
    // Setup input offset and input buffer.
    uint32_t input_offset_bytes = seq_offset_bytes;
    auto input_buffer = in0;
    if (seq_offset_bytes >= input_size_0_bytes && input_size_1_bytes) {
      // Size overlap, select input buffer 1.
      // TODO(DrChat): This needs testing.
      input_offset_bytes -= input_size_0_bytes;
      input_buffer = in1;
    }
    // Still have data to read.
    auto packet = input_buffer + input_offset_bytes;
    assert_true(input_offset_bytes % 2048 == 0);
    PreparePacket(packet, seq_offset_bytes, kBytesPerPacket, sample_rate,
                  channels);
    data->input_buffer_read_offset += kBytesPerPacket * 8;
    input_remaining_bytes -= kBytesPerPacket;
    if (input_remaining_bytes <= 0) {
      // Used the last of the data but prepared a packet.
      return 0;
    }
  } else {
    // No more data available and no packet prepared.
    return -1;
  }
  // Setup input offset and input buffer.
  uint32_t input_offset_bytes = packet_offset_bytes;
  auto input_buffer = in0;
  if (packet_offset_bytes >= input_size_0_bytes && input_size_1_bytes) {
    // Size overlap, select input buffer 1.
    // TODO(DrChat): This needs testing.
    input_offset_bytes -= input_size_0_bytes;
    input_buffer = in1;
  }
  // Still have data to read.
  auto packet = input_buffer + input_offset_bytes;
  assert_true(input_offset_bytes % 2048 == 0);
  PreparePacket(packet, packet_offset_bytes, kBytesPerPacket, sample_rate,
                channels);
  data->input_buffer_read_offset += kBytesPerPacket * 8;
  input_remaining_bytes -= kBytesPerPacket;
  if (input_remaining_bytes <= 0) {
    // Used the last of the data but prepared a packet.
    return 0;
  }
  return input_remaining_bytes;
 }
@ -390,15 +514,11 @@ int XmaContext::PreparePacket(uint8_t* input, size_t seq_offset, size_t size,
    return 1;
  }
-  std::memcpy(packet_data_, input, size);
+  // Packet metadata is always 1 for XMA2
  assert_true((input[2] & 0x7) == 1);
-  // Modify the packet header so it's WMAPro compatible.
+  packet_->data = input;
-  auto int_packet_data = reinterpret_cast<int*>(packet_data_);
+  packet_->size = (int)size;
  *int_packet_data =
      (((seq_offset & 0x7800) | 0x400) >> 7) | (*int_packet_data & 0xFFFEFF08);
  packet_->data = packet_data_;
  packet_->size = kBytesPerPacket;
  // Re-initialize the context with new sample rate and channels.
  if (context_->sample_rate != sample_rate || context_->channels != channels) {
--- a/src/xenia/apu/xma_context.h
+++ b/src/xenia/apu/xma_context.h
@ -60,8 +60,8 @@ struct XMA_CONTEXT_DATA {
  uint32_t input_buffer_1_packet_count : 12;  // XMASetInputBuffer1, number of
                                              // 2KB packets. Max 4095 packets.
                                              // These packets form a block.
-  uint32_t loop_subframe_end : 2;             // +12bit, XMASetLoopData
+  uint32_t loop_subframe_start : 2;             // +12bit, XMASetLoopData
-  uint32_t unk_dword_1_a : 3;                 // ? might be loop_subframe_skip
+  uint32_t loop_subframe_end : 3;               // +14bit, XMASetLoopData
  uint32_t loop_subframe_skip : 3;            // +17bit, XMASetLoopData might be
                                              // subframe_decode_count
  uint32_t subframe_decode_count : 4;  // +20bit might be subframe_skip_count
@ -91,7 +91,7 @@ struct XMA_CONTEXT_DATA {
  // DWORD 7
  uint32_t output_buffer_ptr;  // physical address
  // DWORD 8
-  uint32_t overlap_add_ptr;  // PtrOverlapAdd(?)
+  uint32_t work_buffer_ptr;  // PtrOverlapAdd(?)
  // DWORD 9
  // +0bit, XMAGetOutputBufferReadOffset AKA WriteBufferOffsetRead
@ -133,6 +133,7 @@ class XmaContext {
  static const uint32_t kBytesPerSample = 2;
  static const uint32_t kSamplesPerFrame = 512;
  static const uint32_t kSamplesPerSubframe = 128;
  static const uint32_t kBytesPerFrame = kSamplesPerFrame * kBytesPerSample;
  static const uint32_t kBytesPerSubframe =
      kSamplesPerSubframe * kBytesPerSample;
@ -165,6 +166,10 @@ class XmaContext {
  static int GetSampleRate(int id);
  void DecodePackets(XMA_CONTEXT_DATA* data);
  uint32_t GetFramePacketNumber(uint8_t* block, size_t size, size_t bit_offset);
  int PrepareDecoder(uint8_t* block, size_t size, int sample_rate,
                     int channels);
  int StartPacket(XMA_CONTEXT_DATA* data);
  int PreparePacket(uint8_t* input, size_t seq_offset, size_t size,
@ -189,11 +194,11 @@ class XmaContext {
  AVPacket* packet_ = nullptr;
  WmaProExtraData extra_data_;
  // If we didn't finish writing a frame to the output buffer, this is the offset.
  size_t current_frame_pos_ = 0;
  uint32_t last_input_read_pos_ = 0; // Last seen read buffer pos
  uint8_t* current_frame_ = nullptr;
  uint32_t frame_samples_size_ = 0;
  uint8_t packet_data_[kBytesPerPacket];
 };
 }  // namespace apu
--- a/src/xenia/kernel/xboxkrnl_audio_xma.cc
+++ b/src/xenia/kernel/xboxkrnl_audio_xma.cc
@ -171,7 +171,7 @@ SHIM_CALL XMASetLoopData_shim(PPCContext* ppc_context,
  context.loop_end = loop_data->loop_end;
  context.loop_count = loop_data->loop_count;
  context.loop_subframe_end = loop_data->loop_subframe_end;
-  context.loop_subframe_skip = loop_data->loop_subframe_end;
+  context.loop_subframe_skip = loop_data->loop_subframe_skip;
  context.Store(SHIM_MEM_ADDR(context_ptr));
--- a/third_party/libav
+++ b/third_party/libav
@ -0,0 +1 @@
 Subproject commit 4752bdcdbaf61afc933e667a016bca4eb389ac21
		`@ -0,0 +1 @@`
							`Subproject commit 4752bdcdbaf61afc933e667a016bca4eb389ac21`