diff --git a/libxenia.vcxproj b/libxenia.vcxproj index 8d1f3e02b..f1c28be46 100644 --- a/libxenia.vcxproj +++ b/libxenia.vcxproj @@ -16,7 +16,6 @@ - @@ -24,6 +23,8 @@ + + @@ -266,7 +267,6 @@ - @@ -276,6 +276,8 @@ + + diff --git a/libxenia.vcxproj.filters b/libxenia.vcxproj.filters index 48681189a..ef8268443 100644 --- a/libxenia.vcxproj.filters +++ b/libxenia.vcxproj.filters @@ -709,9 +709,6 @@ src\xenia\kernel - - src\xenia\apu - src\xenia\cpu\backend\x64 @@ -775,6 +772,12 @@ src\xenia\cpu\compiler\passes + + src\xenia\apu + + + src\xenia\apu + @@ -1404,9 +1407,6 @@ third_party\xbyak\xbyak - - src\xenia\apu - src\xenia\debug\proto @@ -1500,6 +1500,12 @@ src\xenia\cpu\backend + + src\xenia\apu + + + src\xenia\apu + diff --git a/src/xenia/apu/apu.h b/src/xenia/apu/apu.h index c9f2931b3..25fc8f2bf 100644 --- a/src/xenia/apu/apu.h +++ b/src/xenia/apu/apu.h @@ -13,6 +13,7 @@ #include #include "xenia/apu/audio_system.h" +#include "xenia/apu/xma_decoder.h" namespace xe { class Emulator; diff --git a/src/xenia/apu/audio_decoder.h b/src/xenia/apu/audio_decoder.h deleted file mode 100644 index b62a7308d..000000000 --- a/src/xenia/apu/audio_decoder.h +++ /dev/null @@ -1,65 +0,0 @@ -/** - ****************************************************************************** - * Xenia : Xbox 360 Emulator Research Project * - ****************************************************************************** - * Copyright 2015 Ben Vanik. All rights reserved. * - * Released under the BSD license - see LICENSE in the root for more details. * - ****************************************************************************** - */ - -#ifndef XENIA_APU_AUDIO_DECODER_H_ -#define XENIA_APU_AUDIO_DECODER_H_ - -#include "xenia/apu/audio_system.h" - -// XMA audio format: -// From research, XMA appears to be based on WMA Pro with -// a few (very slight) modifications. -// XMA2 is fully backwards-compatible with XMA1. - -// Helpful resources: -// https://github.com/koolkdev/libertyv/blob/master/libav_wrapper/xma2dec.c -// http://hcs64.com/mboard/forum.php?showthread=14818 -// https://github.com/hrydgard/minidx9/blob/master/Include/xma2defs.h - -// Forward declarations -struct AVCodec; -struct AVCodecContext; -struct AVFrame; -struct AVPacket; - -namespace xe { -namespace apu { - -class AudioDecoder { - public: - AudioDecoder(); - ~AudioDecoder(); - - int Initialize(); - - int PreparePacket(uint8_t* input, size_t seq_offset, size_t size, - int sample_rate, int channels); - void DiscardPacket(); - - int DecodePacket(uint8_t* output, size_t offset, size_t size); - - private: - // libav structures - AVCodec* codec_; - AVCodecContext* context_; - AVFrame* decoded_frame_; - AVPacket* packet_; - - size_t current_frame_pos_; - uint8_t* current_frame_; - uint32_t frame_samples_size_; - - uint8_t packet_data_[XMAContextData::kBytesPerPacket]; -}; - -} // namespace apu -} // namespace xe - - -#endif // XENIA_APU_AUDIO_DECODER_H_ \ No newline at end of file diff --git a/src/xenia/apu/audio_system.cc b/src/xenia/apu/audio_system.cc index 06290cd23..75cf8addc 100644 --- a/src/xenia/apu/audio_system.cc +++ b/src/xenia/apu/audio_system.cc @@ -10,7 +10,6 @@ #include "xenia/apu/audio_system.h" #include "xenia/apu/audio_driver.h" -#include "xenia/apu/audio_decoder.h" #include "xenia/base/logging.h" #include "xenia/base/math.h" #include "xenia/base/ring_buffer.h" @@ -21,10 +20,6 @@ #include "xenia/kernel/objects/xthread.h" #include "xenia/profiling.h" -extern "C" { -#include "libavutil/log.h" -} - // As with normal Microsoft, there are like twelve different ways to access // the audio APIs. Early games use XMA*() methods almost exclusively to touch // decoders. Later games use XAudio*() and direct memory writes to the XMA @@ -36,20 +31,6 @@ extern "C" { // The XMA*() functions just manipulate the audio system in the guest context // and let the normal AudioSystem handling take it, to prevent duplicate // implementations. They can be found in xboxkrnl_audio_xma.cc -// -// XMA details: -// https://devel.nuclex.org/external/svn/directx/trunk/include/xma2defs.h -// https://github.com/gdawg/fsbext/blob/master/src/xma_header.h -// -// XAudio2 uses XMA under the covers, and seems to map with the same -// restrictions of frame/subframe/etc: -// https://msdn.microsoft.com/en-us/library/windows/desktop/microsoft.directx_sdk.xaudio2.xaudio2_buffer(v=vs.85).aspx -// -// XMA contexts are 64b in size and tight bitfields. They are in physical -// memory not usually available to games. Games will use MmMapIoSpace to get -// the 64b pointer in user memory so they can party on it. If the game doesn't -// do this, it's likely they are either passing the context to XAudio or -// using the XMA* functions. namespace xe { namespace apu { @@ -59,8 +40,7 @@ using namespace xe::cpu; AudioSystem::AudioSystem(Emulator* emulator) : emulator_(emulator), memory_(emulator->memory()), - worker_running_(false), - decoder_running_(false) { + worker_running_(false) { std::memset(clients_, 0, sizeof(clients_)); for (size_t i = 0; i < kMaximumClientCount; ++i) { unused_clients_.push(i); @@ -80,47 +60,9 @@ AudioSystem::~AudioSystem() { CloseHandle(shutdown_event_); } -void av_log_callback(void *avcl, int level, const char *fmt, va_list va) { - StringBuffer buff; - buff.AppendVarargs(fmt, va); - - xe::log_line('i', "libav: %s", buff.GetString()); -} - X_STATUS AudioSystem::Setup() { processor_ = emulator_->processor(); - // Let the processor know we want register access callbacks. - emulator_->memory()->AddVirtualMappedRange( - 0x7FEA0000, 0xFFFF0000, 0x0000FFFF, this, - reinterpret_cast(MMIOReadRegisterThunk), - reinterpret_cast(MMIOWriteRegisterThunk)); - - // Setup XMA contexts ptr. - registers_.xma_context_array_ptr = memory()->SystemHeapAlloc( - sizeof(XMAContextData) * kXmaContextCount, 256, kSystemHeapPhysical); - // Add all contexts to the free list. - for (int i = kXmaContextCount - 1; i >= 0; --i) { - uint32_t ptr = registers_.xma_context_array_ptr + i * sizeof(XMAContextData); - - XMAContext& context = xma_context_array_[i]; - - // Initialize it - context.guest_ptr = ptr; - context.in_use = false; - context.kicked = false; - - // Create a new decoder per context - // Needed because some data needs to be persisted across calls - // TODO: Need to destroy this on class destruction - context.decoder = new AudioDecoder(); - context.decoder->Initialize(); - } - registers_.next_context = 1; - - // Setup libav logging callback - av_log_set_callback(av_log_callback); - worker_running_ = true; worker_thread_ = kernel::object_ref(new kernel::XHostThread( @@ -131,16 +73,6 @@ X_STATUS AudioSystem::Setup() { worker_thread_->set_name("Audio Worker"); worker_thread_->Create(); - decoder_running_ = true; - decoder_thread_ = - kernel::object_ref(new kernel::XHostThread( - emulator()->kernel_state(), 128 * 1024, 0, [this]() { - DecoderThreadMain(); - return 0; - })); - decoder_thread_->set_name("Audio Decoder"); - decoder_thread_->Create(); - return X_STATUS_SUCCESS; } @@ -197,34 +129,6 @@ void AudioSystem::WorkerThreadMain() { // TODO(benvanik): call module API to kill? } -void AudioSystem::DecoderThreadMain() { - while (decoder_running_) { - // Wait for a kick from WriteRegister. - //decoder_fence_.Wait(); - - // Check to see if we're supposed to exit - if (!decoder_running_) { - break; - } - - // Okay, let's loop through XMA contexts to find ones we need to decode! - for (uint32_t n = 0; n < kXmaContextCount; n++) { - XMAContext& context = xma_context_array_[n]; - if (context.in_use && context.kicked) { - context.lock.lock(); - context.kicked = false; - - auto context_ptr = memory()->TranslateVirtual(context.guest_ptr); - XMAContextData data(context_ptr); - ProcessXmaContext(context, data); - data.Store(context_ptr); - - context.lock.unlock(); - } - } - } -} - void AudioSystem::Initialize() {} void AudioSystem::Shutdown() { @@ -232,66 +136,6 @@ void AudioSystem::Shutdown() { SetEvent(shutdown_event_); worker_thread_->Wait(0, 0, 0, nullptr); worker_thread_.reset(); - - decoder_running_ = false; - decoder_fence_.Signal(); - worker_thread_.reset(); - - memory()->SystemHeapFree(registers_.xma_context_array_ptr); -} - -uint32_t AudioSystem::AllocateXmaContext() { - std::lock_guard lock(lock_); - - for (uint32_t n = 0; n < kXmaContextCount; n++) { - XMAContext& context = xma_context_array_[n]; - if (!context.in_use) { - context.in_use = true; - return context.guest_ptr; - } - } - - return 0; -} - -void AudioSystem::ReleaseXmaContext(uint32_t guest_ptr) { - std::lock_guard lock(lock_); - - // Find it in the list. - for (uint32_t n = 0; n < kXmaContextCount; n++) { - XMAContext& context = xma_context_array_[n]; - if (context.guest_ptr == guest_ptr) { - // Found it! - // Lock it in case the decoder thread is working on it now - context.lock.lock(); - - context.in_use = false; - auto context_ptr = memory()->TranslateVirtual(guest_ptr); - std::memset(context_ptr, 0, sizeof(XMAContextData)); // Zero it. - context.decoder->DiscardPacket(); - - context.lock.unlock(); - break; - } - } -} - -bool AudioSystem::BlockOnXmaContext(uint32_t guest_ptr, bool poll) { - std::lock_guard lock(lock_); - for (uint32_t n = 0; n < kXmaContextCount; n++) { - XMAContext& context = xma_context_array_[n]; - if (context.guest_ptr == guest_ptr) { - if (!context.lock.try_lock()) { - if (poll) { - return false; - } - context.lock.lock(); - } - context.lock.unlock(); - return true; - } - } - return true; } X_STATUS AudioSystem::RegisterClient(uint32_t callback, uint32_t callback_arg, @@ -353,304 +197,5 @@ void AudioSystem::UnregisterClient(size_t index) { assert_true(wait_result == WAIT_TIMEOUT); } -void AudioSystem::ProcessXmaContext(XMAContext& context, XMAContextData& data) { - SCOPE_profile_cpu_f("apu"); - - // What I see: - // XMA outputs 2 bytes per sample - // 512 samples per frame (128 per subframe) - // Max output size is data.output_buffer_block_count * 256 - - // This decoder is fed packets (max 4095 per buffer) - // Packets contain "some" frames - // 32bit header (big endian) - - // Frames are the smallest thing the SPUs can decode. - // They usually can span packets (libav handles this) - - // Sample rates (data.sample_rate): - // 0 - 24 kHz ? - // 1 - 32 kHz - // 2 - 44.1 kHz ? - // 3 - 48 kHz ? - - // SPUs also support stereo decoding. (data.is_stereo) - - // Check the output buffer - we cannot decode anything else if it's - // unavailable. - if (!data.output_buffer_valid) { - return; - } - - // Translate this for future use. - uint8_t* output_buffer = memory()->TranslatePhysical(data.output_buffer_ptr); - - // Output buffers are in raw PCM samples, 256 bytes per block. - // Output buffer is a ring buffer. We need to write from the write offset - // to the read offset. - uint32_t output_capacity = data.output_buffer_block_count * 256; - uint32_t output_read_offset = data.output_buffer_read_offset * 256; - uint32_t output_write_offset = data.output_buffer_write_offset * 256; - - RingBuffer output_rb(output_buffer, output_capacity); - output_rb.set_read_offset(output_read_offset); - output_rb.set_write_offset(output_write_offset); - - size_t output_remaining_bytes = output_rb.write_count(); - - // Decode until we can't write any more data. - while (output_remaining_bytes > 0) { - // This'll copy audio samples into the output buffer. - // The samples need to be 2 bytes long! - // Copies one frame at a time, so keep calling this until size == 0 - int read_bytes = 0; - int decode_attempts_remaining = 3; - - uint8_t work_buffer[XMAContextData::kOutputMaxSizeBytes]; - while (decode_attempts_remaining) { - read_bytes = context.decoder->DecodePacket(work_buffer, 0, - output_remaining_bytes); - if (read_bytes >= 0) { - //assert_true((read_bytes % 256) == 0); - auto written_bytes = output_rb.Write(work_buffer, read_bytes); - assert_true(read_bytes == written_bytes); - - // Ok. - break; - } else { - // Sometimes the decoder will fail on a packet. I think it's - // looking for cross-packet frames and failing. If you run it again - // on the same packet it'll work though. - --decode_attempts_remaining; - } - } - - if (!decode_attempts_remaining) { - XELOGAPU("AudioSystem: libav failed to decode packet (returned %.8X)", -read_bytes); - - // Failed out. - if (data.input_buffer_0_valid || data.input_buffer_1_valid) { - // There's new data available - maybe we'll be ok if we decode it? - read_bytes = 0; - context.decoder->DiscardPacket(); - } else { - // No data and hosed - bail. - break; - } - } - - data.output_buffer_write_offset = output_rb.write_offset() / 256; - output_remaining_bytes -= read_bytes; - - // If we need more data and the input buffers have it, grab it. - if (read_bytes) { - // Haven't finished with current packet. - continue; - } else if (data.input_buffer_0_valid || data.input_buffer_1_valid) { - // Done with previous packet, so grab a new one. - int ret = PrepareXMAPacket(context, data); - if (ret <= 0) { - // No more data (but may have prepared a packet) - data.input_buffer_0_valid = 0; - data.input_buffer_1_valid = 0; - } - } else { - // Decoder is out of data and there's no more to give. - break; - } - } - - // The game will kick us again with a new output buffer later. - data.output_buffer_valid = 0; -} - -int AudioSystem::PrepareXMAPacket(XMAContext &context, XMAContextData &data) { - // Translate pointers for future use. - uint8_t* in0 = data.input_buffer_0_valid - ? memory()->TranslatePhysical(data.input_buffer_0_ptr) - : nullptr; - uint8_t* in1 = data.input_buffer_1_valid - ? memory()->TranslatePhysical(data.input_buffer_1_ptr) - : nullptr; - - int sample_rate = 0; - if (data.sample_rate == 0) { - sample_rate = 24000; - } else if (data.sample_rate == 1) { - sample_rate = 32000; - } else if (data.sample_rate == 2) { - sample_rate = 44100; - } else if (data.sample_rate == 3) { - sample_rate = 48000; - } - int channels = data.is_stereo ? 2 : 1; - - // See if we've finished with the input. - // Block count is in packets, so expand by packet size. - uint32_t input_size_0_bytes = (data.input_buffer_0_packet_count) * 2048; - uint32_t input_size_1_bytes = (data.input_buffer_1_packet_count) * 2048; - - // Total input size - uint32_t input_size_bytes = input_size_0_bytes + input_size_1_bytes; - - // Input read offset is in bits. Typically starts at 32 (4 bytes). - // "Sequence" offset - used internally for WMA Pro decoder. - // Just the read offset. - uint32_t seq_offset_bytes = (data.input_buffer_read_offset & ~0x7FF) / 8; - uint32_t input_remaining_bytes = input_size_bytes - seq_offset_bytes; - - if (seq_offset_bytes < input_size_bytes) { - // Setup input offset and input buffer. - uint32_t input_offset_bytes = seq_offset_bytes; - auto input_buffer = in0; - - if (seq_offset_bytes >= input_size_0_bytes) { - // Size overlap, select input buffer 1. - // TODO: This needs testing. - input_offset_bytes -= input_size_0_bytes; - input_buffer = in1; - } - - // Still have data to read. - auto packet = input_buffer + input_offset_bytes; - assert_true(input_offset_bytes % 2048 == 0); - context.decoder->PreparePacket(packet, seq_offset_bytes, - XMAContextData::kBytesPerPacket, - sample_rate, channels); - data.input_buffer_read_offset += XMAContextData::kBytesPerPacket * 8; - - input_remaining_bytes -= XMAContextData::kBytesPerPacket; - if (input_remaining_bytes <= 0) { - // Used the last of the data but prepared a packet - return 0; - } - } else { - // No more data available and no packet prepared. - return -1; - } - - return input_remaining_bytes; -} - -// free60 may be useful here, however it looks like it's using a different -// piece of hardware: -// https://github.com/Free60Project/libxenon/blob/master/libxenon/drivers/xenon_sound/sound.c - -uint64_t AudioSystem::ReadRegister(uint32_t addr) { - uint32_t r = addr & 0xFFFF; - XELOGAPU("ReadRegister(%.4X)", r); - // 1800h is read on startup and stored -- context? buffers? - // 1818h is read during a lock? - - assert_true(r % 4 == 0); - uint32_t value = register_file_[r / 4]; - - // 1818 is rotating context processing # set to hardware ID of context being - // processed. - // If bit 200h is set, the locking code will possibly collide on hardware IDs - // and error out, so we should never set it (I think?). - if (r == 0x1818) { - // To prevent games from seeing a stuck XMA context, return a rotating - // number - registers_.current_context = registers_.next_context; - registers_.next_context = (registers_.next_context + 1) % kXmaContextCount; - value = registers_.current_context; - } - - value = xe::byte_swap(value); - return value; -} - -void AudioSystem::WriteRegister(uint32_t addr, uint64_t value) { - SCOPE_profile_cpu_f("apu"); - - uint32_t r = addr & 0xFFFF; - value = xe::byte_swap(uint32_t(value)); - XELOGAPU("WriteRegister(%.4X, %.8X)", r, value); - // 1804h is written to with 0x02000000 and 0x03000000 around a lock operation - - assert_true(r % 4 == 0); - register_file_[r / 4] = uint32_t(value); - - if (r >= 0x1940 && r <= 0x1940 + 9 * 4) { - // Context kick command. - // This will kick off the given hardware contexts. - // Basically, this kicks the SPU and says "hey, decode that audio!" - // XMAEnableContext - - // The context ID is a bit in the range of the entire context array. - for (int i = 0; value && i < 32; ++i) { - if (value & 1) { - uint32_t context_id = i + (r - 0x1940) / 4 * 32; - XMAContext& context = xma_context_array_[context_id]; - - context.lock.lock(); - auto context_ptr = memory()->TranslateVirtual(context.guest_ptr); - XMAContextData data(context_ptr); - - XELOGAPU("AudioSystem: kicking context %d (%d/%d bytes)", context_id, - (data.input_buffer_read_offset & ~0x7FF) / 8, - (data.input_buffer_0_packet_count + data.input_buffer_1_packet_count) - * XMAContextData::kBytesPerPacket); - - // Reset valid flags so our audio decoder knows to process this one. - data.input_buffer_0_valid = data.input_buffer_0_ptr != 0; - data.input_buffer_1_valid = data.input_buffer_1_ptr != 0; - - data.Store(context_ptr); - - context.kicked = true; - context.lock.unlock(); - } - value >>= 1; - } - - // Signal the decoder thread to start processing. - decoder_fence_.Signal(); - } else if (r >= 0x1A40 && r <= 0x1A40 + 9 * 4) { - // Context lock command. - // This requests a lock by flagging the context. - // XMADisableContext - for (int i = 0; value && i < 32; ++i) { - if (value & 1) { - uint32_t context_id = i + (r - 0x1A40) / 4 * 32; - XELOGAPU("AudioSystem: set context lock %d", context_id); - } - value >>= 1; - } - - // Signal the decoder thread to start processing. - decoder_fence_.Signal(); - } else if (r >= 0x1A80 && r <= 0x1A80 + 9 * 4) { - // Context clear command. - // This will reset the given hardware contexts. - for (int i = 0; value && i < 32; ++i) { - if (value & 1) { - uint32_t context_id = i + (r - 0x1A80) / 4 * 32; - XMAContext& context = xma_context_array_[context_id]; - XELOGAPU("AudioSystem: reset context %d", context_id); - - context.lock.lock(); - auto context_ptr = memory()->TranslateVirtual(context.guest_ptr); - XMAContextData data(context_ptr); - - context.decoder->DiscardPacket(); - data.input_buffer_0_valid = 0; - data.input_buffer_1_valid = 0; - data.output_buffer_valid = 0; - - data.output_buffer_read_offset = 0; - data.output_buffer_write_offset = 0; - - data.Store(context_ptr); - context.lock.unlock(); - } - value >>= 1; - } - } else { - value = value; - } -} - } // namespace apu } // namespace xe diff --git a/src/xenia/apu/audio_system.h b/src/xenia/apu/audio_system.h index 287d7b0a1..55f9016f4 100644 --- a/src/xenia/apu/audio_system.h +++ b/src/xenia/apu/audio_system.h @@ -27,99 +27,8 @@ namespace xe { namespace apu { class AudioDriver; -class AudioDecoder; - -// This is stored in guest space in big-endian order. -// We load and swap the whole thing to splat here so that we can -// use bitfields. -// This could be important: -// http://www.fmod.org/questions/question/forum-15859 -// Appears to be dumped in order (for the most part) - -// http://pastebin.com/9amqJ2kQ -struct XMAContextData { - static const uint32_t kBytesPerPacket = 2048; - static const uint32_t kSamplesPerFrame = 512; - static const uint32_t kSamplesPerSubframe = 128; - - static const uint32_t kOutputBytesPerBlock = 256; - static const uint32_t kOutputMaxSizeBytes = 31 * kOutputBytesPerBlock; - - // DWORD 0 - uint32_t input_buffer_0_packet_count : 12; // XMASetInputBuffer0, number of - // 2KB packets. Max 4095 packets. - // These packets form a block. - uint32_t loop_count : 8; // +12bit, XMASetLoopData NumLoops - uint32_t input_buffer_0_valid : 1; // +20bit, XMAIsInputBuffer0Valid - uint32_t input_buffer_1_valid : 1; // +21bit, XMAIsInputBuffer1Valid - uint32_t output_buffer_block_count : 5; // +22bit SizeWrite 256byte blocks - uint32_t - output_buffer_write_offset : 5; // +27bit, XMAGetOutputBufferWriteOffset - // AKA OffsetWrite - - // DWORD 1 - uint32_t input_buffer_1_packet_count : 12; // XMASetInputBuffer1, number of - // 2KB packets. Max 4095 packets. - // These packets form a block. - uint32_t loop_subframe_end : 2; // +12bit, XMASetLoopData - uint32_t unk_dword_1_a : 3; // ? might be loop_subframe_skip - uint32_t loop_subframe_skip : 3; // +17bit, XMASetLoopData might be - // subframe_decode_count - uint32_t subframe_decode_count : 4; // +20bit might be subframe_skip_count - uint32_t unk_dword_1_b : 3; // ? NumSubframesToSkip/NumChannels(?) - uint32_t sample_rate : 2; // +27bit enum of sample rates - uint32_t is_stereo : 1; // +29bit - uint32_t unk_dword_1_c : 1; // +30bit - uint32_t output_buffer_valid : 1; // +31bit, XMAIsOutputBufferValid - - // DWORD 2 - uint32_t input_buffer_read_offset : 26; // XMAGetInputBufferReadOffset - uint32_t unk_dword_2 : 6; // ErrorStatus/ErrorSet (?) - - // DWORD 3 - uint32_t loop_start : 26; // XMASetLoopData LoopStartOffset - uint32_t unk_dword_3 : 6; // ? ParserErrorStatus/ParserErrorSet(?) - - // DWORD 4 - uint32_t loop_end : 26; // XMASetLoopData LoopEndOffset - uint32_t packet_metadata : 5; // XMAGetPacketMetadata - uint32_t current_buffer : 1; // ? - - // DWORD 5 - uint32_t input_buffer_0_ptr; // physical address - // DWORD 6 - uint32_t input_buffer_1_ptr; // physical address - // DWORD 7 - uint32_t output_buffer_ptr; // physical address - // DWORD 8 - uint32_t overlap_add_ptr; // PtrOverlapAdd(?) - - // DWORD 9 - // +0bit, XMAGetOutputBufferReadOffset AKA WriteBufferOffsetRead - uint32_t output_buffer_read_offset : 5; - uint32_t unk_dword_9 : 27; // StopWhenDone/InterruptWhenDone(?) - - // DWORD 10-15 - uint32_t unk_dwords_10_15[6]; // reserved? - - XMAContextData(const void* ptr) { - xe::copy_and_swap_32_aligned(reinterpret_cast(this), - reinterpret_cast(ptr), - sizeof(XMAContextData) / 4); - } - - void Store(void* ptr) { - xe::copy_and_swap_32_aligned(reinterpret_cast(ptr), - reinterpret_cast(this), - sizeof(XMAContextData) / 4); - } -}; -static_assert_size(XMAContextData, 64); class AudioSystem { - protected: - struct XMAContext; - public: virtual ~AudioSystem(); @@ -130,13 +39,6 @@ class AudioSystem { virtual X_STATUS Setup(); virtual void Shutdown(); - uint32_t xma_context_array_ptr() const { - return registers_.xma_context_array_ptr; - } - uint32_t AllocateXmaContext(); - void ReleaseXmaContext(uint32_t guest_ptr); - bool BlockOnXmaContext(uint32_t guest_ptr, bool poll); - X_STATUS RegisterClient(uint32_t callback, uint32_t callback_arg, size_t* out_index); void UnregisterClient(size_t index); @@ -146,9 +48,6 @@ class AudioSystem { AudioDriver** out_driver) = 0; virtual void DestroyDriver(AudioDriver* driver) = 0; - virtual uint64_t ReadRegister(uint32_t addr); - virtual void WriteRegister(uint32_t addr, uint64_t value); - // TODO(gibbed): respect XAUDIO2_MAX_QUEUED_BUFFERS somehow (ie min(64, XAUDIO2_MAX_QUEUED_BUFFERS)) static const size_t kMaximumQueuedFrames = 64; @@ -157,19 +56,6 @@ class AudioSystem { private: void WorkerThreadMain(); - void DecoderThreadMain(); - - void ProcessXmaContext(XMAContext& context, XMAContextData& data); - int PrepareXMAPacket(XMAContext& context, XMAContextData& data); - - static uint64_t MMIOReadRegisterThunk(void* ppc_context, AudioSystem* as, - uint32_t addr) { - return as->ReadRegister(addr); - } - static void MMIOWriteRegisterThunk(void* ppc_context, AudioSystem* as, - uint32_t addr, uint64_t value) { - as->WriteRegister(addr, value); - } protected: AudioSystem(Emulator* emulator); @@ -181,47 +67,8 @@ class AudioSystem { std::atomic worker_running_; kernel::object_ref worker_thread_; - std::atomic decoder_running_; - kernel::object_ref decoder_thread_; - xe::threading::Fence decoder_fence_; - xe::mutex lock_; - // Stored little endian, accessed through 0x7FEA.... - union { - struct { - union { - struct { - uint8_t ignored0[0x1800]; - // 1800h; points to guest-space physical block of 320 contexts. - uint32_t xma_context_array_ptr; - }; - struct { - uint8_t ignored1[0x1818]; - // 1818h; current context ID. - uint32_t current_context; - // 181Ch; next context ID to process. - uint32_t next_context; - }; - }; - } registers_; - uint32_t register_file_[0xFFFF / 4]; - }; - - struct XMAContext { - uint32_t guest_ptr; - xe::mutex lock; - bool in_use; - bool kicked; - - AudioDecoder* decoder; - }; - - static const uint32_t kXmaContextCount = 320; // // Total number of XMA contexts available. - XMAContext xma_context_array_[kXmaContextCount]; - std::vector xma_context_free_list_; - std::vector xma_context_used_list_; // XMA contexts in use - static const size_t kMaximumClientCount = 8; struct { diff --git a/src/xenia/apu/audio_decoder.cc b/src/xenia/apu/xma_context.cc similarity index 90% rename from src/xenia/apu/audio_decoder.cc rename to src/xenia/apu/xma_context.cc index 40bd0dd7f..4136ceacc 100644 --- a/src/xenia/apu/audio_decoder.cc +++ b/src/xenia/apu/xma_context.cc @@ -7,13 +7,12 @@ ****************************************************************************** */ -#include "xenia/apu/audio_decoder.h" +#include "xenia/apu/xma_context.h" +#include "xenia/apu/xma_decoder.h" +#include "xenia/base/logging.h" #include -#include "xenia/apu/audio_system.h" -#include "xenia/base/logging.h" - extern "C" { #include "libavcodec/avcodec.h" } @@ -24,13 +23,13 @@ extern "C" { namespace xe { namespace apu { -AudioDecoder::AudioDecoder() +XmaContext::XmaContext() : codec_(nullptr), context_(nullptr), decoded_frame_(nullptr), packet_(nullptr) {} -AudioDecoder::~AudioDecoder() { +XmaContext::~XmaContext() { if (context_) { if (context_->extradata) { delete context_->extradata; @@ -48,7 +47,7 @@ AudioDecoder::~AudioDecoder() { } } -int AudioDecoder::Initialize() { +int XmaContext::Initialize() { static bool avcodec_initialized = false; if (!avcodec_initialized) { avcodec_register_all(); @@ -77,7 +76,7 @@ int AudioDecoder::Initialize() { // Initialize these to 0. They'll actually be set later. context_->channels = 0; context_->sample_rate = 0; - context_->block_align = XMAContextData::kBytesPerPacket; + context_->block_align = XMA_CONTEXT_DATA::kBytesPerPacket; // Extra data passed to the decoder context_->extradata_size = 18; @@ -86,7 +85,7 @@ int AudioDecoder::Initialize() { // Current frame stuff whatever // samples per frame * 2 max channels * output bytes current_frame_ = - new uint8_t[XMAContextData::kSamplesPerFrame * 2 * 2]; + new uint8_t[XMA_CONTEXT_DATA::kSamplesPerFrame * 2 * 2]; current_frame_pos_ = 0; frame_samples_size_ = 0; @@ -99,9 +98,9 @@ int AudioDecoder::Initialize() { return 0; } -int AudioDecoder::PreparePacket(uint8_t *input, size_t seq_offset, size_t size, +int XmaContext::PreparePacket(uint8_t *input, size_t seq_offset, size_t size, int sample_rate, int channels) { - if (size != XMAContextData::kBytesPerPacket) { + if (size != XMA_CONTEXT_DATA::kBytesPerPacket) { // Invalid packet size! assert_always(); return 1; @@ -118,7 +117,7 @@ int AudioDecoder::PreparePacket(uint8_t *input, size_t seq_offset, size_t size, (*((int *)packet_data_) & 0xFFFEFF08); packet_->data = packet_data_; - packet_->size = XMAContextData::kBytesPerPacket; + packet_->size = XMA_CONTEXT_DATA::kBytesPerPacket; // Re-initialize the context with new sample rate and channels if (context_->sample_rate != sample_rate || context_->channels != channels) { @@ -137,7 +136,7 @@ int AudioDecoder::PreparePacket(uint8_t *input, size_t seq_offset, size_t size, return 0; } -void AudioDecoder::DiscardPacket() { +void XmaContext::DiscardPacket() { if (packet_->size > 0 || current_frame_pos_ != frame_samples_size_) { packet_->data = 0; packet_->size = 0; @@ -145,7 +144,7 @@ void AudioDecoder::DiscardPacket() { } } -int AudioDecoder::DecodePacket(uint8_t *output, size_t output_offset, +int XmaContext::DecodePacket(uint8_t *output, size_t output_offset, size_t output_size) { size_t to_copy = 0; size_t original_offset = output_offset; @@ -180,7 +179,7 @@ int AudioDecoder::DecodePacket(uint8_t *output, size_t output_offset, // Successfully decoded a frame if (got_frame) { // Validity checks. - if (decoded_frame_->nb_samples > XMAContextData::kSamplesPerFrame) { + if (decoded_frame_->nb_samples > XMA_CONTEXT_DATA::kSamplesPerFrame) { return -2; } else if (context_->sample_fmt != AV_SAMPLE_FMT_FLTP) { return -3; diff --git a/src/xenia/apu/xma_context.h b/src/xenia/apu/xma_context.h new file mode 100644 index 000000000..28eb3f35a --- /dev/null +++ b/src/xenia/apu/xma_context.h @@ -0,0 +1,169 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2015 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#ifndef XENIA_APU_XMA_CONTEXT_H_ +#define XENIA_APU_XMA_CONTEXT_H_ + +#include +#include +#include + +#include "xenia/emulator.h" +#include "xenia/xbox.h" + +// XMA audio format: +// From research, XMA appears to be based on WMA Pro with +// a few (very slight) modifications. +// XMA2 is fully backwards-compatible with XMA1. + +// Helpful resources: +// https://github.com/koolkdev/libertyv/blob/master/libav_wrapper/xma2dec.c +// http://hcs64.com/mboard/forum.php?showthread=14818 +// https://github.com/hrydgard/minidx9/blob/master/Include/xma2defs.h + +// Forward declarations +struct AVCodec; +struct AVCodecContext; +struct AVFrame; +struct AVPacket; + +namespace xe { +namespace apu { + +// This is stored in guest space in big-endian order. +// We load and swap the whole thing to splat here so that we can +// use bitfields. +// This could be important: +// http://www.fmod.org/questions/question/forum-15859 +// Appears to be dumped in order (for the most part) + +struct XMA_CONTEXT_DATA { + static const uint32_t kBytesPerPacket = 2048; + static const uint32_t kSamplesPerFrame = 512; + static const uint32_t kSamplesPerSubframe = 128; + + static const uint32_t kOutputBytesPerBlock = 256; + static const uint32_t kOutputMaxSizeBytes = 31 * kOutputBytesPerBlock; + + // DWORD 0 + uint32_t input_buffer_0_packet_count : 12; // XMASetInputBuffer0, number of + // 2KB packets. Max 4095 packets. + // These packets form a block. + uint32_t loop_count : 8; // +12bit, XMASetLoopData NumLoops + uint32_t input_buffer_0_valid : 1; // +20bit, XMAIsInputBuffer0Valid + uint32_t input_buffer_1_valid : 1; // +21bit, XMAIsInputBuffer1Valid + uint32_t output_buffer_block_count : 5; // +22bit SizeWrite 256byte blocks + uint32_t output_buffer_write_offset : 5; // +27bit + // XMAGetOutputBufferWriteOffset + // AKA OffsetWrite + + // DWORD 1 + uint32_t input_buffer_1_packet_count : 12; // XMASetInputBuffer1, number of + // 2KB packets. Max 4095 packets. + // These packets form a block. + uint32_t loop_subframe_end : 2; // +12bit, XMASetLoopData + uint32_t unk_dword_1_a : 3; // ? might be loop_subframe_skip + uint32_t loop_subframe_skip : 3; // +17bit, XMASetLoopData might be + // subframe_decode_count + uint32_t subframe_decode_count : 4; // +20bit might be subframe_skip_count + uint32_t unk_dword_1_b : 3; // ? NumSubframesToSkip/NumChannels(?) + uint32_t sample_rate : 2; // +27bit enum of sample rates + uint32_t is_stereo : 1; // +29bit + uint32_t unk_dword_1_c : 1; // +30bit + uint32_t output_buffer_valid : 1; // +31bit, XMAIsOutputBufferValid + + // DWORD 2 + uint32_t input_buffer_read_offset : 26; // XMAGetInputBufferReadOffset + uint32_t unk_dword_2 : 6; // ErrorStatus/ErrorSet (?) + + // DWORD 3 + uint32_t loop_start : 26; // XMASetLoopData LoopStartOffset + uint32_t unk_dword_3 : 6; // ? ParserErrorStatus/ParserErrorSet(?) + + // DWORD 4 + uint32_t loop_end : 26; // XMASetLoopData LoopEndOffset + uint32_t packet_metadata : 5; // XMAGetPacketMetadata + uint32_t current_buffer : 1; // ? + + // DWORD 5 + uint32_t input_buffer_0_ptr; // physical address + // DWORD 6 + uint32_t input_buffer_1_ptr; // physical address + // DWORD 7 + uint32_t output_buffer_ptr; // physical address + // DWORD 8 + uint32_t overlap_add_ptr; // PtrOverlapAdd(?) + + // DWORD 9 + // +0bit, XMAGetOutputBufferReadOffset AKA WriteBufferOffsetRead + uint32_t output_buffer_read_offset : 5; + uint32_t unk_dword_9 : 27; // StopWhenDone/InterruptWhenDone(?) + + // DWORD 10-15 + uint32_t unk_dwords_10_15[6]; // reserved? + + XMA_CONTEXT_DATA(const void* ptr) { + xe::copy_and_swap(reinterpret_cast(this), + reinterpret_cast(ptr), + sizeof(XMA_CONTEXT_DATA) / 4); + } + + void Store(void* ptr) { + xe::copy_and_swap(reinterpret_cast(ptr), + reinterpret_cast(this), + sizeof(XMA_CONTEXT_DATA) / 4); + } +}; +static_assert_size(XMA_CONTEXT_DATA, 64); + +class XmaContext { + public: + XmaContext(); + ~XmaContext(); + + int Initialize(); + + int PreparePacket(uint8_t* input, size_t seq_offset, size_t size, + int sample_rate, int channels); + void DiscardPacket(); + + int DecodePacket(uint8_t* output, size_t offset, size_t size); + + uint32_t guest_ptr() { return guest_ptr_; } + xe::mutex& lock() { return lock_; } // TODO(gibbed): remove this + bool in_use() { return in_use_; } + bool kicked() { return kicked_; } + + void set_guest_ptr(uint32_t guest_ptr) { guest_ptr_ = guest_ptr; } + void set_in_use(bool in_use) { in_use_ = in_use; } + void set_kicked(bool kicked) { kicked_ = kicked; } + + private: + uint32_t guest_ptr_; + xe::mutex lock_; + bool in_use_; + bool kicked_; + + // libav structures + AVCodec* codec_; + AVCodecContext* context_; + AVFrame* decoded_frame_; + AVPacket* packet_; + + size_t current_frame_pos_; + uint8_t* current_frame_; + uint32_t frame_samples_size_; + + uint8_t packet_data_[XMA_CONTEXT_DATA::kBytesPerPacket]; +}; + +} // namespace apu +} // namespace xe + +#endif // XENIA_APU_XMA_CONTEXT_H_ diff --git a/src/xenia/apu/xma_decoder.cc b/src/xenia/apu/xma_decoder.cc new file mode 100644 index 000000000..e18a4bb51 --- /dev/null +++ b/src/xenia/apu/xma_decoder.cc @@ -0,0 +1,496 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2013 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#include "xenia/apu/audio_system.h" + +#include "xenia/apu/xma_context.h" +#include "xenia/apu/xma_decoder.h" +#include "xenia/base/logging.h" +#include "xenia/base/math.h" +#include "xenia/base/ring_buffer.h" +#include "xenia/base/string_buffer.h" +#include "xenia/cpu/processor.h" +#include "xenia/cpu/thread_state.h" +#include "xenia/emulator.h" +#include "xenia/kernel/objects/xthread.h" +#include "xenia/profiling.h" + +extern "C" { +#include "libavutil/log.h" +} + +// As with normal Microsoft, there are like twelve different ways to access +// the audio APIs. Early games use XMA*() methods almost exclusively to touch +// decoders. Later games use XAudio*() and direct memory writes to the XMA +// structures (as opposed to the XMA* calls), meaning that we have to support +// both. +// +// For ease of implementation, most audio related processing is handled in +// AudioSystem, and the functions here call off to it. +// The XMA*() functions just manipulate the audio system in the guest context +// and let the normal AudioSystem handling take it, to prevent duplicate +// implementations. They can be found in xboxkrnl_audio_xma.cc +// +// XMA details: +// https://devel.nuclex.org/external/svn/directx/trunk/include/xma2defs.h +// https://github.com/gdawg/fsbext/blob/master/src/xma_header.h +// +// XAudio2 uses XMA under the covers, and seems to map with the same +// restrictions of frame/subframe/etc: +// https://msdn.microsoft.com/en-us/library/windows/desktop/microsoft.directx_sdk.xaudio2.xaudio2_buffer(v=vs.85).aspx +// +// XMA contexts are 64b in size and tight bitfields. They are in physical +// memory not usually available to games. Games will use MmMapIoSpace to get +// the 64b pointer in user memory so they can party on it. If the game doesn't +// do this, it's likely they are either passing the context to XAudio or +// using the XMA* functions. + +namespace xe { +namespace apu { + +using namespace xe::cpu; + +XmaDecoder::XmaDecoder(Emulator* emulator) + : emulator_(emulator), + memory_(emulator->memory()), + worker_running_(false) { +} + +XmaDecoder::~XmaDecoder() { +} + +void av_log_callback(void *avcl, int level, const char *fmt, va_list va) { + StringBuffer buff; + buff.AppendVarargs(fmt, va); + + xe::log_line('i', "libav: %s", buff.GetString()); +} + +X_STATUS XmaDecoder::Setup() { + processor_ = emulator_->processor(); + + // Setup libav logging callback + av_log_set_callback(av_log_callback); + + // Let the processor know we want register access callbacks. + emulator_->memory()->AddVirtualMappedRange( + 0x7FEA0000, 0xFFFF0000, 0x0000FFFF, this, + reinterpret_cast(MMIOReadRegisterThunk), + reinterpret_cast(MMIOWriteRegisterThunk)); + + // Setup XMA contexts ptr. + registers_.context_array_ptr = memory()->SystemHeapAlloc( + sizeof(XMA_CONTEXT_DATA) * kContextCount, 256, kSystemHeapPhysical); + // Add all contexts to the free list. + for (int i = kContextCount - 1; i >= 0; --i) { + uint32_t ptr = registers_.context_array_ptr + i * sizeof(XMA_CONTEXT_DATA); + XmaContext& context = context_array_[i]; + context.set_guest_ptr(ptr); + context.Initialize(); + } + registers_.next_context = 1; + + worker_running_ = true; + worker_thread_ = + kernel::object_ref(new kernel::XHostThread( + emulator()->kernel_state(), 128 * 1024, 0, [this]() { + WorkerThreadMain(); + return 0; + })); + worker_thread_->set_name("XMA Decoder"); + worker_thread_->Create(); + + return X_STATUS_SUCCESS; +} + +void XmaDecoder::WorkerThreadMain() { + while (worker_running_) { + // Okay, let's loop through XMA contexts to find ones we need to decode! + for (uint32_t n = 0; n < kContextCount; n++) { + XmaContext& context = context_array_[n]; + if (context.in_use() && context.kicked()) { + context.lock().lock(); + context.set_kicked(false); + + auto context_ptr = memory()->TranslateVirtual(context.guest_ptr()); + XMA_CONTEXT_DATA data(context_ptr); + ProcessContext(context, data); + data.Store(context_ptr); + + context.lock().unlock(); + } + } + } +} + +void XmaDecoder::Initialize() {} + +void XmaDecoder::Shutdown() { + worker_running_ = false; + worker_fence_.Signal(); + worker_thread_.reset(); + + memory()->SystemHeapFree(registers_.context_array_ptr); +} + +uint32_t XmaDecoder::AllocateContext() { + std::lock_guard lock(lock_); + + for (uint32_t n = 0; n < kContextCount; n++) { + XmaContext& context = context_array_[n]; + if (!context.in_use()) { + context.set_in_use(true); + return context.guest_ptr(); + } + } + + return 0; +} + +void XmaDecoder::ReleaseContext(uint32_t guest_ptr) { + std::lock_guard lock(lock_); + + // Find it in the list. + for (uint32_t n = 0; n < kContextCount; n++) { + XmaContext& context = context_array_[n]; + if (context.guest_ptr() == guest_ptr) { + // Found it! + // Lock it in case the decoder thread is working on it now + context.lock().lock(); + + context.set_in_use(false); + auto context_ptr = memory()->TranslateVirtual(guest_ptr); + std::memset(context_ptr, 0, sizeof(XMA_CONTEXT_DATA)); // Zero it. + context.DiscardPacket(); + + context.lock().unlock(); + break; + } + } +} + +bool XmaDecoder::BlockOnContext(uint32_t guest_ptr, bool poll) { + std::lock_guard lock(lock_); + for (uint32_t n = 0; n < kContextCount; n++) { + XmaContext& context = context_array_[n]; + if (context.guest_ptr() == guest_ptr) { + if (!context.lock().try_lock()) { + if (poll) { + return false; + } + context.lock().lock(); + } + context.lock().unlock(); + return true; + } + } + return true; +} + +void XmaDecoder::ProcessContext(XmaContext& context, XMA_CONTEXT_DATA& data) { + SCOPE_profile_cpu_f("apu"); + + // What I see: + // XMA outputs 2 bytes per sample + // 512 samples per frame (128 per subframe) + // Max output size is data.output_buffer_block_count * 256 + + // This decoder is fed packets (max 4095 per buffer) + // Packets contain "some" frames + // 32bit header (big endian) + + // Frames are the smallest thing the SPUs can decode. + // They usually can span packets (libav handles this) + + // Sample rates (data.sample_rate): + // 0 - 24 kHz ? + // 1 - 32 kHz + // 2 - 44.1 kHz ? + // 3 - 48 kHz ? + + // SPUs also support stereo decoding. (data.is_stereo) + + // Check the output buffer - we cannot decode anything else if it's + // unavailable. + if (!data.output_buffer_valid) { + return; + } + + // Translate this for future use. + uint8_t* output_buffer = memory()->TranslatePhysical(data.output_buffer_ptr); + + // Output buffers are in raw PCM samples, 256 bytes per block. + // Output buffer is a ring buffer. We need to write from the write offset + // to the read offset. + uint32_t output_capacity = data.output_buffer_block_count * 256; + uint32_t output_read_offset = data.output_buffer_read_offset * 256; + uint32_t output_write_offset = data.output_buffer_write_offset * 256; + + RingBuffer output_rb(output_buffer, output_capacity); + output_rb.set_read_offset(output_read_offset); + output_rb.set_write_offset(output_write_offset); + + size_t output_remaining_bytes = output_rb.write_count(); + + // Decode until we can't write any more data. + while (output_remaining_bytes > 0) { + // This'll copy audio samples into the output buffer. + // The samples need to be 2 bytes long! + // Copies one frame at a time, so keep calling this until size == 0 + int read_bytes = 0; + int decode_attempts_remaining = 3; + + uint8_t work_buffer[XMA_CONTEXT_DATA::kOutputMaxSizeBytes]; + while (decode_attempts_remaining) { + read_bytes = context.DecodePacket(work_buffer, 0, + output_remaining_bytes); + if (read_bytes >= 0) { + //assert_true((read_bytes % 256) == 0); + auto written_bytes = output_rb.Write(work_buffer, read_bytes); + assert_true(read_bytes == written_bytes); + + // Ok. + break; + } else { + // Sometimes the decoder will fail on a packet. I think it's + // looking for cross-packet frames and failing. If you run it again + // on the same packet it'll work though. + --decode_attempts_remaining; + } + } + + if (!decode_attempts_remaining) { + XELOGAPU("AudioSystem: libav failed to decode packet (returned %.8X)", -read_bytes); + + // Failed out. + if (data.input_buffer_0_valid || data.input_buffer_1_valid) { + // There's new data available - maybe we'll be ok if we decode it? + read_bytes = 0; + context.DiscardPacket(); + } else { + // No data and hosed - bail. + break; + } + } + + data.output_buffer_write_offset = output_rb.write_offset() / 256; + output_remaining_bytes -= read_bytes; + + // If we need more data and the input buffers have it, grab it. + if (read_bytes) { + // Haven't finished with current packet. + continue; + } else if (data.input_buffer_0_valid || data.input_buffer_1_valid) { + // Done with previous packet, so grab a new one. + int ret = PreparePacket(context, data); + if (ret <= 0) { + // No more data (but may have prepared a packet) + data.input_buffer_0_valid = 0; + data.input_buffer_1_valid = 0; + } + } else { + // Decoder is out of data and there's no more to give. + break; + } + } + + // The game will kick us again with a new output buffer later. + data.output_buffer_valid = 0; +} + +int XmaDecoder::PreparePacket(XmaContext &context, XMA_CONTEXT_DATA &data) { + // Translate pointers for future use. + uint8_t* in0 = data.input_buffer_0_valid + ? memory()->TranslatePhysical(data.input_buffer_0_ptr) + : nullptr; + uint8_t* in1 = data.input_buffer_1_valid + ? memory()->TranslatePhysical(data.input_buffer_1_ptr) + : nullptr; + + int sample_rate = 0; + if (data.sample_rate == 0) { + sample_rate = 24000; + } else if (data.sample_rate == 1) { + sample_rate = 32000; + } else if (data.sample_rate == 2) { + sample_rate = 44100; + } else if (data.sample_rate == 3) { + sample_rate = 48000; + } + int channels = data.is_stereo ? 2 : 1; + + // See if we've finished with the input. + // Block count is in packets, so expand by packet size. + uint32_t input_size_0_bytes = (data.input_buffer_0_packet_count) * 2048; + uint32_t input_size_1_bytes = (data.input_buffer_1_packet_count) * 2048; + + // Total input size + uint32_t input_size_bytes = input_size_0_bytes + input_size_1_bytes; + + // Input read offset is in bits. Typically starts at 32 (4 bytes). + // "Sequence" offset - used internally for WMA Pro decoder. + // Just the read offset. + uint32_t seq_offset_bytes = (data.input_buffer_read_offset & ~0x7FF) / 8; + uint32_t input_remaining_bytes = input_size_bytes - seq_offset_bytes; + + if (seq_offset_bytes < input_size_bytes) { + // Setup input offset and input buffer. + uint32_t input_offset_bytes = seq_offset_bytes; + auto input_buffer = in0; + + if (seq_offset_bytes >= input_size_0_bytes) { + // Size overlap, select input buffer 1. + // TODO: This needs testing. + input_offset_bytes -= input_size_0_bytes; + input_buffer = in1; + } + + // Still have data to read. + auto packet = input_buffer + input_offset_bytes; + assert_true(input_offset_bytes % 2048 == 0); + context.PreparePacket(packet, seq_offset_bytes, + XMA_CONTEXT_DATA::kBytesPerPacket, + sample_rate, channels); + data.input_buffer_read_offset += XMA_CONTEXT_DATA::kBytesPerPacket * 8; + + input_remaining_bytes -= XMA_CONTEXT_DATA::kBytesPerPacket; + if (input_remaining_bytes <= 0) { + // Used the last of the data but prepared a packet + return 0; + } + } else { + // No more data available and no packet prepared. + return -1; + } + + return input_remaining_bytes; +} + +// free60 may be useful here, however it looks like it's using a different +// piece of hardware: +// https://github.com/Free60Project/libxenon/blob/master/libxenon/drivers/xenon_sound/sound.c + +uint64_t XmaDecoder::ReadRegister(uint32_t addr) { + uint32_t r = addr & 0xFFFF; + XELOGAPU("ReadRegister(%.4X)", r); + // 1800h is read on startup and stored -- context? buffers? + // 1818h is read during a lock? + + assert_true(r % 4 == 0); + uint32_t value = register_file_[r / 4]; + + // 1818 is rotating context processing # set to hardware ID of context being + // processed. + // If bit 200h is set, the locking code will possibly collide on hardware IDs + // and error out, so we should never set it (I think?). + if (r == 0x1818) { + // To prevent games from seeing a stuck XMA context, return a rotating + // number + registers_.current_context = registers_.next_context; + registers_.next_context = (registers_.next_context + 1) % kContextCount; + value = registers_.current_context; + } + + value = xe::byte_swap(value); + return value; +} + +void XmaDecoder::WriteRegister(uint32_t addr, uint64_t value) { + SCOPE_profile_cpu_f("apu"); + + uint32_t r = addr & 0xFFFF; + value = xe::byte_swap(uint32_t(value)); + XELOGAPU("WriteRegister(%.4X, %.8X)", r, value); + // 1804h is written to with 0x02000000 and 0x03000000 around a lock operation + + assert_true(r % 4 == 0); + register_file_[r / 4] = uint32_t(value); + + if (r >= 0x1940 && r <= 0x1940 + 9 * 4) { + // Context kick command. + // This will kick off the given hardware contexts. + // Basically, this kicks the SPU and says "hey, decode that audio!" + // XMAEnableContext + + // The context ID is a bit in the range of the entire context array. + for (int i = 0; value && i < 32; ++i) { + if (value & 1) { + uint32_t context_id = i + (r - 0x1940) / 4 * 32; + XmaContext& context = context_array_[context_id]; + + context.lock().lock(); + auto context_ptr = memory()->TranslateVirtual(context.guest_ptr()); + XMA_CONTEXT_DATA data(context_ptr); + + XELOGAPU("AudioSystem: kicking context %d (%d/%d bytes)", context_id, + (data.input_buffer_read_offset & ~0x7FF) / 8, + (data.input_buffer_0_packet_count + data.input_buffer_1_packet_count) + * XMA_CONTEXT_DATA::kBytesPerPacket); + + // Reset valid flags so our audio decoder knows to process this one. + data.input_buffer_0_valid = data.input_buffer_0_ptr != 0; + data.input_buffer_1_valid = data.input_buffer_1_ptr != 0; + + data.Store(context_ptr); + + context.set_kicked(true); + context.lock().unlock(); + } + value >>= 1; + } + + // Signal the decoder thread to start processing. + worker_fence_.Signal(); + } else if (r >= 0x1A40 && r <= 0x1A40 + 9 * 4) { + // Context lock command. + // This requests a lock by flagging the context. + // XMADisableContext + for (int i = 0; value && i < 32; ++i) { + if (value & 1) { + uint32_t context_id = i + (r - 0x1A40) / 4 * 32; + XELOGAPU("AudioSystem: set context lock %d", context_id); + } + value >>= 1; + } + + // Signal the decoder thread to start processing. + worker_fence_.Signal(); + } else if (r >= 0x1A80 && r <= 0x1A80 + 9 * 4) { + // Context clear command. + // This will reset the given hardware contexts. + for (int i = 0; value && i < 32; ++i) { + if (value & 1) { + uint32_t context_id = i + (r - 0x1A80) / 4 * 32; + XmaContext& context = context_array_[context_id]; + XELOGAPU("AudioSystem: reset context %d", context_id); + + context.lock().lock(); + auto context_ptr = memory()->TranslateVirtual(context.guest_ptr()); + XMA_CONTEXT_DATA data(context_ptr); + + context.DiscardPacket(); + data.input_buffer_0_valid = 0; + data.input_buffer_1_valid = 0; + data.output_buffer_valid = 0; + + data.output_buffer_read_offset = 0; + data.output_buffer_write_offset = 0; + + data.Store(context_ptr); + context.lock().unlock(); + } + value >>= 1; + } + } else { + value = value; + } +} + +} // namespace apu +} // namespace xe diff --git a/src/xenia/apu/xma_decoder.h b/src/xenia/apu/xma_decoder.h new file mode 100644 index 000000000..8e90f01b2 --- /dev/null +++ b/src/xenia/apu/xma_decoder.h @@ -0,0 +1,114 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2013 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#ifndef XENIA_APU_XMA_DECODER_H_ +#define XENIA_APU_XMA_DECODER_H_ + +#include +#include +#include + +#include "xenia/emulator.h" +#include "xenia/xbox.h" +#include "xenia/apu/xma_context.h" + +namespace xe { +namespace kernel { +class XHostThread; +} // namespace kernel +} // namespace xe + +namespace xe { +namespace apu { + +struct XMA_CONTEXT_DATA; + +class XmaDecoder { + public: + XmaDecoder(Emulator* emulator); + virtual ~XmaDecoder(); + + Emulator* emulator() const { return emulator_; } + Memory* memory() const { return memory_; } + cpu::Processor* processor() const { return processor_; } + + virtual X_STATUS Setup(); + virtual void Shutdown(); + + uint32_t context_array_ptr() const { + return registers_.context_array_ptr; + } + + uint32_t AllocateContext(); + void ReleaseContext(uint32_t guest_ptr); + bool BlockOnContext(uint32_t guest_ptr, bool poll); + + virtual uint64_t ReadRegister(uint32_t addr); + virtual void WriteRegister(uint32_t addr, uint64_t value); + + protected: + virtual void Initialize(); + + private: + void WorkerThreadMain(); + + void ProcessContext(XmaContext& context, XMA_CONTEXT_DATA& data); + int PreparePacket(XmaContext& context, XMA_CONTEXT_DATA& data); + + static uint64_t MMIOReadRegisterThunk(void* ppc_context, XmaDecoder* as, + uint32_t addr) { + return as->ReadRegister(addr); + } + static void MMIOWriteRegisterThunk(void* ppc_context, XmaDecoder* as, + uint32_t addr, uint64_t value) { + as->WriteRegister(addr, value); + } + + protected: + Emulator* emulator_; + Memory* memory_; + cpu::Processor* processor_; + + std::atomic worker_running_; + kernel::object_ref worker_thread_; + xe::threading::Fence worker_fence_; + + xe::mutex lock_; + + // Stored little endian, accessed through 0x7FEA.... + union { + struct { + union { + struct { + uint8_t ignored0[0x1800]; + // 1800h; points to guest-space physical block of 320 contexts. + uint32_t context_array_ptr; + }; + struct { + uint8_t ignored1[0x1818]; + // 1818h; current context ID. + uint32_t current_context; + // 181Ch; next context ID to process. + uint32_t next_context; + }; + }; + } registers_; + uint32_t register_file_[0xFFFF / 4]; + }; + + static const uint32_t kContextCount = 320; + XmaContext context_array_[kContextCount]; + std::vector xma_context_free_list_; + std::vector xma_context_used_list_; // XMA contexts in use +}; + +} // namespace apu +} // namespace xe + +#endif // XENIA_APU_XMA_DECODER_H_ diff --git a/src/xenia/emulator.cc b/src/xenia/emulator.cc index 9ed480a2a..d6e06afcc 100644 --- a/src/xenia/emulator.cc +++ b/src/xenia/emulator.cc @@ -49,10 +49,12 @@ Emulator::~Emulator() { // Give the systems time to shutdown before we delete them. graphics_system_->Shutdown(); audio_system_->Shutdown(); + xma_decoder_->Shutdown(); input_system_.reset(); graphics_system_.reset(); audio_system_.reset(); + xma_decoder_.reset(); kernel_state_.reset(); file_system_.reset(); @@ -117,6 +119,8 @@ X_STATUS Emulator::Setup() { return X_STATUS_NOT_IMPLEMENTED; } + xma_decoder_ = std::move(std::make_unique(this)); + // Initialize the GPU. graphics_system_ = std::move(xe::gpu::Create(this)); if (!graphics_system_) { @@ -155,6 +159,11 @@ X_STATUS Emulator::Setup() { return result; } + result = xma_decoder_->Setup(); + if (result) { + return result; + } + // HLE kernel modules. kernel_state_->LoadKernelModule(); kernel_state_->LoadKernelModule(); diff --git a/src/xenia/emulator.h b/src/xenia/emulator.h index 2ed3b38b3..58e647e25 100644 --- a/src/xenia/emulator.h +++ b/src/xenia/emulator.h @@ -21,6 +21,7 @@ namespace xe { namespace apu { class AudioSystem; +class XmaDecoder; } // namespace apu namespace cpu { class ExportResolver; @@ -55,6 +56,7 @@ class Emulator { cpu::Processor* processor() const { return processor_.get(); } apu::AudioSystem* audio_system() const { return audio_system_.get(); } + apu::XmaDecoder* xma_decoder() const { return xma_decoder_.get(); } gpu::GraphicsSystem* graphics_system() const { return graphics_system_.get(); } @@ -88,6 +90,7 @@ class Emulator { std::unique_ptr processor_; std::unique_ptr audio_system_; + std::unique_ptr xma_decoder_; std::unique_ptr graphics_system_; std::unique_ptr input_system_; diff --git a/src/xenia/kernel/xboxkrnl_audio_xma.cc b/src/xenia/kernel/xboxkrnl_audio_xma.cc index a620c90ba..576ded893 100644 --- a/src/xenia/kernel/xboxkrnl_audio_xma.cc +++ b/src/xenia/kernel/xboxkrnl_audio_xma.cc @@ -61,8 +61,8 @@ SHIM_CALL XMACreateContext_shim(PPCContext* ppc_context, XELOGD("XMACreateContext(%.8X)", context_out_ptr); - auto audio_system = kernel_state->emulator()->audio_system(); - uint32_t context_ptr = audio_system->AllocateXmaContext(); + auto xma_decoder = kernel_state->emulator()->xma_decoder(); + uint32_t context_ptr = xma_decoder->AllocateContext(); SHIM_SET_MEM_32(context_out_ptr, context_ptr); if (!context_ptr) { SHIM_SET_RETURN_32(X_STATUS_NO_MEMORY); @@ -78,32 +78,32 @@ SHIM_CALL XMAReleaseContext_shim(PPCContext* ppc_context, XELOGD("XMAReleaseContext(%.8X)", context_ptr); - auto audio_system = kernel_state->emulator()->audio_system(); - audio_system->ReleaseXmaContext(context_ptr); + auto xma_decoder = kernel_state->emulator()->xma_decoder(); + xma_decoder->ReleaseContext(context_ptr); SHIM_SET_RETURN_32(0); } void StoreXmaContextIndexedRegister(KernelState* kernel_state, uint32_t base_reg, uint32_t context_ptr) { - auto audio_system = kernel_state->emulator()->audio_system(); - uint32_t hw_index = (context_ptr - audio_system->xma_context_array_ptr()) / - sizeof(XMAContextData); + auto xma_decoder = kernel_state->emulator()->xma_decoder(); + uint32_t hw_index = (context_ptr - xma_decoder->context_array_ptr()) / + sizeof(XMA_CONTEXT_DATA); uint32_t reg_num = base_reg + (hw_index >> 5) * 4; uint32_t reg_value = 1 << (hw_index & 0x1F); - audio_system->WriteRegister(reg_num, xe::byte_swap(reg_value)); + xma_decoder->WriteRegister(reg_num, xe::byte_swap(reg_value)); } -struct X_XMA_LOOP_DATA { +struct XMA_LOOP_DATA { xe::be loop_start; xe::be loop_end; xe::be loop_count; xe::be loop_subframe_end; xe::be loop_subframe_skip; }; -static_assert_size(X_XMA_LOOP_DATA, 12); +static_assert_size(XMA_LOOP_DATA, 12); -struct X_XMA_CONTEXT_INIT { +struct XMA_CONTEXT_INIT { xe::be input_buffer_0_ptr; xe::be input_buffer_0_packet_count; xe::be input_buffer_1_ptr; @@ -115,9 +115,9 @@ struct X_XMA_CONTEXT_INIT { xe::be subframe_decode_count; xe::be channel_count; xe::be sample_rate; - X_XMA_LOOP_DATA loop_data; + XMA_LOOP_DATA loop_data; }; -static_assert_size(X_XMA_CONTEXT_INIT, 56); +static_assert_size(XMA_CONTEXT_INIT, 56); SHIM_CALL XMAInitializeContext_shim(PPCContext* ppc_context, KernelState* kernel_state) { @@ -126,10 +126,10 @@ SHIM_CALL XMAInitializeContext_shim(PPCContext* ppc_context, XELOGD("XMAInitializeContext(%.8X, %.8X)", context_ptr, context_init_ptr); - std::memset(SHIM_MEM_ADDR(context_ptr), 0, sizeof(XMAContextData)); + std::memset(SHIM_MEM_ADDR(context_ptr), 0, sizeof(XMA_CONTEXT_DATA)); - XMAContextData context(SHIM_MEM_ADDR(context_ptr)); - auto context_init = (X_XMA_CONTEXT_INIT*)SHIM_MEM_ADDR(context_init_ptr); + XMA_CONTEXT_DATA context(SHIM_MEM_ADDR(context_ptr)); + auto context_init = (XMA_CONTEXT_INIT*)SHIM_MEM_ADDR(context_init_ptr); context.input_buffer_0_ptr = context_init->input_buffer_0_ptr; context.input_buffer_0_packet_count = context_init->input_buffer_0_packet_count; @@ -164,8 +164,8 @@ SHIM_CALL XMASetLoopData_shim(PPCContext* ppc_context, XELOGD("XMASetLoopData(%.8X, %.8X)", context_ptr, loop_data_ptr); - XMAContextData context(SHIM_MEM_ADDR(context_ptr)); - auto loop_data = (X_XMA_LOOP_DATA*)SHIM_MEM_ADDR(loop_data_ptr); + XMA_CONTEXT_DATA context(SHIM_MEM_ADDR(context_ptr)); + auto loop_data = (XMA_CONTEXT_DATA*)SHIM_MEM_ADDR(loop_data_ptr); context.loop_start = loop_data->loop_start; context.loop_end = loop_data->loop_end; @@ -184,7 +184,7 @@ SHIM_CALL XMAGetInputBufferReadOffset_shim(PPCContext* ppc_context, XELOGD("XMAGetInputBufferReadOffset(%.8X)", context_ptr); - XMAContextData context(SHIM_MEM_ADDR(context_ptr)); + XMA_CONTEXT_DATA context(SHIM_MEM_ADDR(context_ptr)); uint32_t result = context.input_buffer_read_offset; @@ -198,7 +198,7 @@ SHIM_CALL XMASetInputBufferReadOffset_shim(PPCContext* ppc_context, XELOGD("XMASetInputBufferReadOffset(%.8X, %.8X)", context_ptr, value); - XMAContextData context(SHIM_MEM_ADDR(context_ptr)); + XMA_CONTEXT_DATA context(SHIM_MEM_ADDR(context_ptr)); context.input_buffer_read_offset = value; @@ -216,7 +216,7 @@ SHIM_CALL XMASetInputBuffer0_shim(PPCContext* ppc_context, XELOGD("XMASetInputBuffer0(%.8X, %.8X, %d)", context_ptr, buffer_ptr, block_count); - XMAContextData context(SHIM_MEM_ADDR(context_ptr)); + XMA_CONTEXT_DATA context(SHIM_MEM_ADDR(context_ptr)); context.input_buffer_0_ptr = buffer_ptr; context.input_buffer_0_packet_count = block_count; @@ -234,7 +234,7 @@ SHIM_CALL XMAIsInputBuffer0Valid_shim(PPCContext* ppc_context, XELOGD("XMAIsInputBuffer0Valid(%.8X)", context_ptr); - XMAContextData context(SHIM_MEM_ADDR(context_ptr)); + XMA_CONTEXT_DATA context(SHIM_MEM_ADDR(context_ptr)); uint32_t result = context.input_buffer_0_valid; @@ -247,7 +247,7 @@ SHIM_CALL XMASetInputBuffer0Valid_shim(PPCContext* ppc_context, XELOGD("XMASetInputBuffer0Valid(%.8X)", context_ptr); - XMAContextData context(SHIM_MEM_ADDR(context_ptr)); + XMA_CONTEXT_DATA context(SHIM_MEM_ADDR(context_ptr)); context.input_buffer_0_valid = 1; @@ -265,7 +265,7 @@ SHIM_CALL XMASetInputBuffer1_shim(PPCContext* ppc_context, XELOGD("XMASetInputBuffer1(%.8X, %.8X, %d)", context_ptr, buffer_ptr, block_count); - XMAContextData context(SHIM_MEM_ADDR(context_ptr)); + XMA_CONTEXT_DATA context(SHIM_MEM_ADDR(context_ptr)); context.input_buffer_1_ptr = buffer_ptr; context.input_buffer_1_packet_count = block_count; @@ -283,7 +283,7 @@ SHIM_CALL XMAIsInputBuffer1Valid_shim(PPCContext* ppc_context, XELOGD("XMAIsInputBuffer1Valid(%.8X)", context_ptr); - XMAContextData context(SHIM_MEM_ADDR(context_ptr)); + XMA_CONTEXT_DATA context(SHIM_MEM_ADDR(context_ptr)); uint32_t result = context.input_buffer_1_valid; @@ -296,7 +296,7 @@ SHIM_CALL XMASetInputBuffer1Valid_shim(PPCContext* ppc_context, XELOGD("XMASetInputBuffer1Valid(%.8X)", context_ptr); - XMAContextData context(SHIM_MEM_ADDR(context_ptr)); + XMA_CONTEXT_DATA context(SHIM_MEM_ADDR(context_ptr)); context.input_buffer_1_valid = 1; @@ -311,7 +311,7 @@ SHIM_CALL XMAIsOutputBufferValid_shim(PPCContext* ppc_context, XELOGD("XMAIsOutputBufferValid(%.8X)", context_ptr); - XMAContextData context(SHIM_MEM_ADDR(context_ptr)); + XMA_CONTEXT_DATA context(SHIM_MEM_ADDR(context_ptr)); uint32_t result = context.output_buffer_valid; @@ -324,7 +324,7 @@ SHIM_CALL XMASetOutputBufferValid_shim(PPCContext* ppc_context, XELOGD("XMASetOutputBufferValid(%.8X)", context_ptr); - XMAContextData context(SHIM_MEM_ADDR(context_ptr)); + XMA_CONTEXT_DATA context(SHIM_MEM_ADDR(context_ptr)); context.output_buffer_valid = 1; @@ -339,7 +339,7 @@ SHIM_CALL XMAGetOutputBufferReadOffset_shim(PPCContext* ppc_context, XELOGD("XMAGetOutputBufferReadOffset(%.8X)", context_ptr); - XMAContextData context(SHIM_MEM_ADDR(context_ptr)); + XMA_CONTEXT_DATA context(SHIM_MEM_ADDR(context_ptr)); uint32_t result = context.output_buffer_read_offset; @@ -353,7 +353,7 @@ SHIM_CALL XMASetOutputBufferReadOffset_shim(PPCContext* ppc_context, XELOGD("XMASetOutputBufferReadOffset(%.8X, %.8X)", context_ptr, value); - XMAContextData context(SHIM_MEM_ADDR(context_ptr)); + XMA_CONTEXT_DATA context(SHIM_MEM_ADDR(context_ptr)); context.output_buffer_read_offset = value; @@ -368,7 +368,7 @@ SHIM_CALL XMAGetOutputBufferWriteOffset_shim(PPCContext* ppc_context, XELOGD("XMAGetOutputBufferWriteOffset(%.8X)", context_ptr); - XMAContextData context(SHIM_MEM_ADDR(context_ptr)); + XMA_CONTEXT_DATA context(SHIM_MEM_ADDR(context_ptr)); uint32_t result = context.output_buffer_write_offset; @@ -381,7 +381,7 @@ SHIM_CALL XMAGetPacketMetadata_shim(PPCContext* ppc_context, XELOGD("XMAGetPacketMetadata(%.8X)", context_ptr); - XMAContextData context(SHIM_MEM_ADDR(context_ptr)); + XMA_CONTEXT_DATA context(SHIM_MEM_ADDR(context_ptr)); uint32_t result = context.packet_metadata; @@ -408,8 +408,8 @@ SHIM_CALL XMADisableContext_shim(PPCContext* ppc_context, X_HRESULT result = X_E_SUCCESS; StoreXmaContextIndexedRegister(kernel_state, 0x1A40, context_ptr); - if (!kernel_state->emulator()->audio_system()->BlockOnXmaContext(context_ptr, - !wait)) { + if (!kernel_state->emulator()->xma_decoder()->BlockOnContext(context_ptr, + !wait)) { result = X_E_FALSE; } @@ -423,7 +423,7 @@ SHIM_CALL XMABlockWhileInUse_shim(PPCContext* ppc_context, XELOGD("XMABlockWhileInUse(%.8X)", context_ptr); do { - XMAContextData context(SHIM_MEM_ADDR(context_ptr)); + XMA_CONTEXT_DATA context(SHIM_MEM_ADDR(context_ptr)); if (!context.input_buffer_0_valid && !context.input_buffer_1_valid) { break; }