Audio now works a bit better, but is still broken :/

2015-05-28 02:15:30 -07:00 · 2015-05-28 02:15:30 -07:00 · e419d314b3
parent a93325434a
commit e419d314b3
4 changed files with 173 additions and 138 deletions
--- a/src/xenia/apu/audio_system.cc
+++ b/src/xenia/apu/audio_system.cc
@ -181,7 +181,7 @@ void AudioSystem::WorkerThreadMain() {

 void AudioSystem::DecoderThreadMain() {
  while (decoder_running_) {
-    // Wait for the fence
+    // Wait for a kick from WriteRegister.
    decoder_fence_.Wait();

    // Check to see if we're supposed to exit
@ -192,128 +192,14 @@ void AudioSystem::DecoderThreadMain() {
    // Okay, let's loop through XMA contexts to find ones we need to decode!
    for (uint32_t n = 0; n < kXmaContextCount; n++) {
      XMAContext& context = xma_context_array_[n];
-      if (!context.lock.try_lock()) {
-        // Someone else has the lock.
-        continue;
-      }
-
-      // Skip unused contexts
-      if (!context.in_use) {
+      if (context.in_use) {
+        context.lock.lock();
+        auto context_ptr = memory()->TranslateVirtual(context.guest_ptr);
+        XMAContextData data(context_ptr);
+        ProcessXmaContext(context, data);
+        data.Store(context_ptr);
        context.lock.unlock();
-        continue;
      }
-
-      uint8_t* ptr = memory()->TranslatePhysical(context.guest_ptr);
-      auto data = XMAContextData(ptr);
-
-      if (data.input_buffer_0_valid || data.input_buffer_1_valid) {
-        // A buffer is valid. Run the decoder!
-
-        // Reset valid flags
-        data.input_buffer_0_valid = 0;
-        data.input_buffer_1_valid = 0;
-        data.output_buffer_valid = 0;
-
-        // Translate pointers for future use.
-        auto in0 = memory()->TranslatePhysical(data.input_buffer_0_ptr);
-        auto in1 = memory()->TranslatePhysical(data.input_buffer_1_ptr);
-        auto out = memory()->TranslatePhysical(data.output_buffer_ptr);
-
-        // What I see:
-        // XMA outputs 2 bytes per sample
-        // 512 samples per frame (128 per subframe)
-        // Max output size is data.output_buffer_block_count * 256
-
-        // This decoder is fed packets (max 4095 per buffer)
-        // Packets contain "some" frames
-        // 32bit header (big endian)
-
-        // Frames are the smallest thing the SPUs can decode.
-        // They usually can span packets (libav handles this)
-
-        // Sample rates (data.sample_rate):
-        // 0 - 24 kHz ?
-        // 1 - 32 kHz
-        // 2 - 44.1 kHz ?
-        // 3 - 48 kHz ?
-
-        // SPUs also support stereo decoding. (data.is_stereo)
-        int retries_remaining = 2;
-        while (retries_remaining) {
-          // Initial check - see if we've finished with the input
-          // TODO - Probably need to move this, I think it might skip the very
-          // last packet (see the call to PreparePacket)
-          size_t input_size = (data.input_buffer_0_block_count +
-                               data.input_buffer_1_block_count) *
-                              2048;
-          size_t input_offset = (data.input_buffer_read_offset / 8 - 4);
-          size_t input_remaining = input_size - input_offset;
-          if (input_offset >= input_size) {
-            // We're finished. Break.
-            break;
-          }
-
-          // Now check the output buffer.
-          size_t output_size = data.output_buffer_block_count * 256;
-          size_t output_offset = data.output_buffer_write_offset * 256;
-          size_t output_remaining = output_size - output_offset;
-          if (output_remaining == 0) {
-            // Can't write any more data. Break.
-            // The game will kick us again with a new output buffer later.
-            break;
-          }
-
-          // This'll copy audio samples into the output buffer.
-          // The samples need to be 2 bytes long!
-          // Copies one frame at a time, so keep calling this until size == 0
-          int read = context.decoder->DecodePacket(out, output_offset,
-                                                   output_remaining);
-          if (read < 0) {
-            // Sometimes the decoder will fail on a packet. I think it's
-            // looking for cross-packet frames and failing. If you run it again
-            // on the same packet it'll work though.
-            XELOGAPU("APU failed to decode packet (returned %.8X)", -read);
-            --retries_remaining;
-            continue;
-          }
-
-          if (read == 0) {
-            // Select sample rate.
-            int sample_rate = 0;
-            if (data.sample_rate == 0) {
-              sample_rate = 24000;
-            } else if (data.sample_rate == 1) {
-              sample_rate = 32000;
-            } else if (data.sample_rate == 2) {
-              sample_rate = 44100;
-            } else if (data.sample_rate == 3) {
-              sample_rate = 48000;
-            }
-
-            // Channels
-            int channels = 1;
-            if (data.is_stereo == 1) {
-              channels = 2;
-            }
-
-            // New packet time.
-            // TODO: Select input buffer 1 if necessary.
-            auto packet = in0 + input_offset;
-            context.decoder->PreparePacket(packet, 2048, sample_rate, channels);
-            input_offset += 2048;
-          }
-
-          output_offset += read;
-
-          // Copy the variables we changed back to the context.
-          data.input_buffer_read_offset = (input_offset + 4) * 8;
-          data.output_buffer_write_offset = output_offset / 256;
-        }
-
-        data.Store(ptr);
-      }
-
-      context.lock.unlock();
    }
  }
 }
@ -437,6 +323,140 @@ void AudioSystem::UnregisterClient(size_t index) {
  ResetEvent(client_wait_handles_[index]);
 }

+void AudioSystem::ProcessXmaContext(XMAContext& context, XMAContextData& data) {
+  if (!context.in_use) {
+    // Skip unused contexts.
+    return;
+  }
+
+  SCOPE_profile_cpu_f("apu");
+
+  // Translate pointers for future use.
+  uint8_t* in0 = data.input_buffer_0_valid
+                     ? memory()->TranslatePhysical(data.input_buffer_0_ptr)
+                     : nullptr;
+  uint8_t* in1 = data.input_buffer_1_valid
+                     ? memory()->TranslatePhysical(data.input_buffer_1_ptr)
+                     : nullptr;
+  uint8_t* out = memory()->TranslatePhysical(data.output_buffer_ptr);
+
+  // What I see:
+  // XMA outputs 2 bytes per sample
+  // 512 samples per frame (128 per subframe)
+  // Max output size is data.output_buffer_block_count * 256
+
+  // This decoder is fed packets (max 4095 per buffer)
+  // Packets contain "some" frames
+  // 32bit header (big endian)
+
+  // Frames are the smallest thing the SPUs can decode.
+  // They usually can span packets (libav handles this)
+
+  // Sample rates (data.sample_rate):
+  // 0 - 24 kHz ?
+  // 1 - 32 kHz
+  // 2 - 44.1 kHz ?
+  // 3 - 48 kHz ?
+
+  // SPUs also support stereo decoding. (data.is_stereo)
+  while (data.output_buffer_valid) {
+    // Check the output buffer - we cannot decode anything else if it's
+    // unavailable.
+    // Output buffers are in frames.
+    uint32_t output_size_bytes = data.output_buffer_block_count * 256;
+    uint32_t output_offset_bytes = data.output_buffer_write_offset * 256;
+    uint32_t output_remaining_bytes = output_size_bytes - output_offset_bytes;
+    if (!output_remaining_bytes) {
+      // Can't write any more data. Break.
+      // The game will kick us again with a new output buffer later.
+      data.output_buffer_valid = 0;
+      break;
+    }
+
+    // This'll copy audio samples into the output buffer.
+    // The samples need to be 2 bytes long!
+    // Copies one frame at a time, so keep calling this until size == 0
+    int read_bytes = 0;
+    int decode_attempts_remaining = 3;
+    while (decode_attempts_remaining) {
+      read_bytes = context.decoder->DecodePacket(out, output_offset_bytes,
+                                                 output_remaining_bytes);
+      if (read_bytes >= 0) {
+        // Ok.
+        break;
+      } else {
+        // Sometimes the decoder will fail on a packet. I think it's
+        // looking for cross-packet frames and failing. If you run it again
+        // on the same packet it'll work though.
+        XELOGAPU("APU failed to decode packet (returned %.8X)", -read_bytes);
+        --decode_attempts_remaining;
+      }
+    }
+    if (!decode_attempts_remaining) {
+      // Failed out.
+      if (data.input_buffer_0_valid || data.input_buffer_1_valid) {
+        // There's new data available - maybe we'll be ok if we decode it?
+        read_bytes = 0;
+        context.decoder->DiscardPacket();
+      } else {
+        // No data and hosed - bail.
+        break;
+      }
+    }
+    data.output_buffer_write_offset += uint32_t(read_bytes) / 256;
+
+    // If we need more data and the input buffers have it, grab it.
+    if (read_bytes) {
+      // Still outputting.
+      continue;
+    } else if (data.input_buffer_0_valid || data.input_buffer_1_valid) {
+      // Done with previous packet, so grab a new one.
+      int sample_rate = 0;
+      if (data.sample_rate == 0) {
+        sample_rate = 24000;
+      } else if (data.sample_rate == 1) {
+        sample_rate = 32000;
+      } else if (data.sample_rate == 2) {
+        sample_rate = 44100;
+      } else if (data.sample_rate == 3) {
+        sample_rate = 48000;
+      }
+      int channels = data.is_stereo ? 2 : 1;
+
+      // See if we've finished with the input
+      // TODO - Probably need to move this, I think it might skip the very
+      // last packet (see the call to PreparePacket)
+      // Block count is in frames, so expand by
+      // samples_per_frame*bytes_per_sample*bits_per_byte.
+      uint32_t input_size_bytes =
+          (data.input_buffer_0_block_count + data.input_buffer_1_block_count) *
+          2048;
+      // Input read offset is in bits. Typically starts at 32 (4 bytes).
+      uint32_t input_offset_bytes =
+          (data.input_buffer_read_offset & ~0x7FF) / 8;
+      if (input_offset_bytes < input_size_bytes) {
+        // Still have data to read.
+        // TODO: Select input buffer 1 if necessary.
+        auto packet = in0 + input_offset_bytes;
+        context.decoder->PreparePacket(packet, 2048, sample_rate, channels);
+        data.input_buffer_read_offset += 2048 * 8;
+        if (input_offset_bytes + 2048 >= input_size_bytes) {
+          // Used the last of the data.
+          data.input_buffer_0_valid = 0;
+          data.input_buffer_1_valid = 0;
+        }
+      } else {
+        // No more data available (for now).
+        data.input_buffer_0_valid = 0;
+        data.input_buffer_1_valid = 0;
+      }
+    } else {
+      // Decoder is out of data and there's no more to give.
+      break;
+    }
+  }
+}
+
 // free60 may be useful here, however it looks like it's using a different
 // piece of hardware:
 // https://github.com/Free60Project/libxenon/blob/master/libxenon/drivers/xenon_sound/sound.c
@ -483,7 +503,7 @@ void AudioSystem::WriteRegister(uint32_t addr, uint64_t value) {
    // Basically, this kicks the SPU and says "hey, decode that audio!"
    // XMAEnableContext

-    // The context ID is a bit in the range of the entire context array
+    // The context ID is a bit in the range of the entire context array.
    for (int i = 0; value && i < 32; ++i) {
      if (value & 1) {
        uint32_t context_id = i + (r - 0x1940) / 4 * 32;
@ -495,10 +515,10 @@ void AudioSystem::WriteRegister(uint32_t addr, uint64_t value) {

        XELOGAPU(
            "AudioSystem: kicking context %d (%d/%d bytes)", context_id,
-            data.input_buffer_read_offset,
+            data.input_buffer_read_offset / 8,
            data.input_buffer_0_block_count * XMAContextData::kBytesPerBlock);

-        // Reset valid flags so our audio decoder knows to process this one
+        // Reset valid flags so our audio decoder knows to process this one.
        data.input_buffer_0_valid = data.input_buffer_0_ptr != 0;
        data.input_buffer_1_valid = data.input_buffer_1_ptr != 0;
        data.output_buffer_write_offset = 0;
@ -519,26 +539,34 @@ void AudioSystem::WriteRegister(uint32_t addr, uint64_t value) {
      if (value & 1) {
        uint32_t context_id = i + (r - 0x1A40) / 4 * 32;
        XELOGAPU("AudioSystem: set context lock %d", context_id);
-
-        // TODO: Find the correct way to lock/unlock this.
-        // I thought we could lock it here, unlock it in the kick but that
-        // doesn't seem to work
-        XMAContext& context = xma_context_array_[context_id];
      }
      value >>= 1;
    }
+
+    // Signal the decoder thread to start processing.
+    decoder_fence_.Signal();
  } else if (r >= 0x1A80 && r <= 0x1A80 + 9 * 4) {
    // Context clear command.
    // This will reset the given hardware contexts.
    for (int i = 0; value && i < 32; ++i) {
      if (value & 1) {
        uint32_t context_id = i + (r - 0x1A80) / 4 * 32;
+        XMAContext& context = xma_context_array_[context_id];
        XELOGAPU("AudioSystem: reset context %d", context_id);

-        // TODO(benvanik): something?
        uint32_t guest_ptr =
            registers_.xma_context_array_ptr + context_id * kXmaContextSize;
-        auto context_ptr = memory()->TranslateVirtual(guest_ptr);
+        context.lock.lock();
+        auto context_ptr = memory()->TranslateVirtual(context.guest_ptr);
+        XMAContextData data(context_ptr);
+
+        context.decoder->DiscardPacket();
+        data.input_buffer_0_valid = 0;
+        data.input_buffer_1_valid = 0;
+        data.output_buffer_valid = 0;
+
+        data.Store(context_ptr);
+        context.lock.unlock();
      }
      value >>= 1;
    }
--- a/src/xenia/apu/audio_system.h
+++ b/src/xenia/apu/audio_system.h
@ -111,6 +111,9 @@ struct XMAContextData {
 static_assert(sizeof(XMAContextData) == 4 * 10, "Must be packed");

 class AudioSystem {
+ protected:
+  struct XMAContext;
+
 public:
  virtual ~AudioSystem();

@ -147,6 +150,8 @@ class AudioSystem {
  void WorkerThreadMain();
  void DecoderThreadMain();

+  void ProcessXmaContext(XMAContext& context, XMAContextData& data);
+
  static uint64_t MMIOReadRegisterThunk(AudioSystem* as, uint32_t addr) {
    return as->ReadRegister(addr);
  }
--- a/src/xenia/kernel/xboxkrnl_threading.cc
+++ b/src/xenia/kernel/xboxkrnl_threading.cc
@ -889,21 +889,23 @@ SHIM_CALL KeWaitForMultipleObjects_shim(PPCContext* ppc_state,

  X_STATUS result = X_STATUS_SUCCESS;

-  std::vector<object_ref<XObject>> objects(count);
+  std::vector<object_ref<XObject>> objects;
  for (uint32_t n = 0; n < count; n++) {
    uint32_t object_ptr_ptr = SHIM_MEM_32(objects_ptr + n * 4);
    void* object_ptr = SHIM_MEM_ADDR(object_ptr_ptr);
-    objects[n] = XObject::GetNativeObject<XObject>(state, object_ptr);
-    if (!objects[n]) {
+    auto object_ref = XObject::GetNativeObject<XObject>(state, object_ptr);
+    if (!object_ref) {
      SHIM_SET_RETURN_32(X_STATUS_INVALID_PARAMETER);
      return;
    }
+    objects.push_back(std::move(object_ref));
  }

  uint64_t timeout = timeout_ptr ? SHIM_MEM_64(timeout_ptr) : 0;
-  result = XObject::WaitMultiple(
-      count, reinterpret_cast<XObject**>(objects.data()), wait_type,
-      wait_reason, processor_mode, alertable, timeout_ptr ? &timeout : nullptr);
+  result = XObject::WaitMultiple(uint32_t(objects.size()),
+                                 reinterpret_cast<XObject**>(objects.data()),
+                                 wait_type, wait_reason, processor_mode,
+                                 alertable, timeout_ptr ? &timeout : nullptr);

  SHIM_SET_RETURN_32(result);
 }
--- a/src/xenia/kernel/xobject.cc
+++ b/src/xenia/kernel/xobject.cc
@ -146,7 +146,7 @@ X_STATUS XObject::WaitMultiple(uint32_t count, XObject** objects,
                               uint32_t wait_type, uint32_t wait_reason,
                               uint32_t processor_mode, uint32_t alertable,
                               uint64_t* opt_timeout) {
-  void** wait_handles = (void**)alloca(sizeof(void*) * count);
+  HANDLE* wait_handles = (HANDLE*)alloca(sizeof(HANDLE) * count);
  for (uint32_t n = 0; n < count; n++) {
    wait_handles[n] = objects[n]->GetWaitHandle();
    assert_not_null(wait_handles[n]);