diff --git a/.gitmodules b/.gitmodules index bd40f6179..5757ad53b 100644 --- a/.gitmodules +++ b/.gitmodules @@ -19,3 +19,6 @@ [submodule "build_tools"] path = build_tools url = https://github.com/xenia-project/build-tools.git +[submodule "third_party/libav"] + path = third_party/libav + url = https://github.com/xenia-project/libav.git diff --git a/src/xenia/apu/xma_context.cc b/src/xenia/apu/xma_context.cc index 7fbe08f17..2bc1c6e55 100644 --- a/src/xenia/apu/xma_context.cc +++ b/src/xenia/apu/xma_context.cc @@ -13,13 +13,17 @@ #include #include "xenia/apu/xma_decoder.h" +#include "xenia/apu/xma_helpers.h" #include "xenia/base/logging.h" #include "xenia/base/ring_buffer.h" #include "xenia/profiling.h" extern "C" { #include "libavcodec/avcodec.h" +#include "libavcodec/xma2dec.h" #include "libavutil/channel_layout.h" + +extern AVCodec ff_xma2_decoder; } // extern "C" // Credits for most of this code goes to: @@ -50,14 +54,8 @@ int XmaContext::Setup(uint32_t id, Memory* memory, uint32_t guest_ptr) { memory_ = memory; guest_ptr_ = guest_ptr; - static bool avcodec_initialized = false; - if (!avcodec_initialized) { - avcodec_register_all(); - avcodec_initialized = true; - } - // Allocate important stuff. - codec_ = avcodec_find_decoder(AV_CODEC_ID_WMAPRO); + codec_ = &ff_xma2_decoder; if (!codec_) { return 1; } @@ -91,7 +89,7 @@ int XmaContext::Setup(uint32_t id, Memory* memory, uint32_t guest_ptr) { // Current frame stuff whatever // samples per frame * 2 max channels * output bytes - current_frame_ = new uint8_t[kSamplesPerFrame * 2 * 2]; + current_frame_ = new uint8_t[kSamplesPerFrame * kBytesPerSample * 2]; current_frame_pos_ = 0; frame_samples_size_ = 0; @@ -119,11 +117,10 @@ void XmaContext::Enable() { auto context_ptr = memory()->TranslateVirtual(guest_ptr()); XMA_CONTEXT_DATA data(context_ptr); - XELOGAPU( - "XmaContext: kicking context %d (%d/%d bytes)", id(), - (data.input_buffer_read_offset & ~0x7FF) / 8, - (data.input_buffer_0_packet_count + data.input_buffer_1_packet_count) * - kBytesPerPacket); + XELOGAPU("XmaContext: kicking context %d (%d/%d bits)", id(), + data.input_buffer_read_offset, (data.input_buffer_0_packet_count + + data.input_buffer_1_packet_count) * + kBytesPerPacket * 8); data.Store(context_ptr); @@ -227,6 +224,26 @@ void XmaContext::DecodePackets(XMA_CONTEXT_DATA* data) { return; } + // XAudio Loops + // loop_count: + // - XAUDIO2_MAX_LOOP_COUNT = 254 + // - XAUDIO2_LOOP_INFINITE = 255 + // loop_start/loop_end are bit offsets to a specific frame + //assert_true(data->loop_count == 0); + + // Translate pointers for future use. + uint8_t* in0 = data->input_buffer_0_valid + ? memory()->TranslatePhysical(data->input_buffer_0_ptr) + : nullptr; + uint8_t* in1 = data->input_buffer_1_valid + ? memory()->TranslatePhysical(data->input_buffer_1_ptr) + : nullptr; + + size_t input_buffer_0_size = + data->input_buffer_0_packet_count * kBytesPerPacket; + size_t input_buffer_1_size = + data->input_buffer_1_packet_count * kBytesPerPacket; + // Output buffers are in raw PCM samples, 256 bytes per block. // Output buffer is a ring buffer. We need to write from the write offset // to the read offset. @@ -246,76 +263,175 @@ void XmaContext::DecodePackets(XMA_CONTEXT_DATA* data) { // Decode until we can't write any more data. while (output_remaining_bytes > 0) { - // This'll copy audio samples into the output buffer. - // The samples need to be 2 bytes long! - // Copies one frame at a time, so keep calling this until size == 0. - int written_bytes = 0; - int decode_attempts_remaining = 3; - - uint8_t work_buffer[kOutputMaxSizeBytes]; - while (decode_attempts_remaining) { - size_t read_bytes = 0; - written_bytes = - DecodePacket(work_buffer, 0, output_remaining_bytes, &read_bytes); - if (written_bytes >= 0) { - // assert_true((written_bytes % 256) == 0); - auto written_bytes_rb = output_rb.Write(work_buffer, written_bytes); - assert_true(written_bytes == written_bytes_rb); - - // Ok. - break; - } else if (read_bytes % 2048 == 0) { - // Sometimes the decoder will fail on a packet. I think it's - // looking for cross-packet frames and failing. If you run it again - // on the same packet it'll work though. - --decode_attempts_remaining; - } else { - // Failed in the middle of a packet, do not retry! - decode_attempts_remaining = 0; - break; - } - } - - if (!decode_attempts_remaining) { - XELOGAPU("XmaContext: libav failed to decode packet (returned %.8X)", - -written_bytes); - - // Failed out. - if (data->input_buffer_0_valid || data->input_buffer_1_valid) { - // There's new data available - maybe we'll be ok if we decode it? - written_bytes = 0; - DiscardPacket(); - } else { - // No data and hosed - bail. - break; - } - } - - data->output_buffer_write_offset = output_rb.write_offset() / 256; - output_remaining_bytes -= written_bytes; - - // If we need more data and the input buffers have it, grab it. - if (written_bytes) { - // Haven't finished with current packet. - continue; - } else if (data->input_buffer_0_valid || data->input_buffer_1_valid) { - // Done with previous packet, so grab a new one. - int ret = StartPacket(data); - if (ret <= 0) { - // No more data (but may have prepared a packet) - data->input_buffer_0_valid = 0; - data->input_buffer_1_valid = 0; - } - } else { - // Decoder is out of data and there's no more to give. + if (!data->input_buffer_0_valid && !data->input_buffer_1_valid) { + // Out of data. break; } + + int num_channels = data->is_stereo ? 2 : 1; + + // Check if we have part of a frame waiting (and the game hasn't jumped + // around) + if (current_frame_pos_ && + last_input_read_pos_ == data->input_buffer_read_offset) { + size_t to_write = std::min( + output_remaining_bytes, + ((size_t)kBytesPerFrame * num_channels - current_frame_pos_)); + output_rb.Write(current_frame_, to_write); + + current_frame_pos_ += to_write; + if (current_frame_pos_ >= kBytesPerFrame * num_channels) { + current_frame_pos_ = 0; + } + + data->output_buffer_write_offset = output_rb.write_offset() / 256; + output_remaining_bytes -= to_write; + continue; + } + + int block_last_frame = 0; // last frame in block? + int got_frame = 0; // successfully decoded a frame? + int frame_size = 0; + packet_->data = in0; + packet_->size = data->input_buffer_0_packet_count * 2048; + PrepareDecoder(in0, data->input_buffer_0_packet_count * 2048, + data->sample_rate, num_channels); + int len = xma2_decode_frame(context_, packet_, decoded_frame_, &got_frame, + &block_last_frame, &frame_size, + data->input_buffer_read_offset); + if (block_last_frame) { + data->input_buffer_0_valid = 0; + data->input_buffer_1_valid = 0; + data->output_buffer_valid = 0; + continue; + } + + if (len == AVERROR_EOF) { + // Screw this gtfo + data->input_buffer_0_valid = 0; + data->input_buffer_1_valid = 0; + data->output_buffer_valid = 0; + + continue; + } else if (len < 0 || !got_frame) { + // Oh no! Skip the frame and hope everything works. + data->input_buffer_read_offset += frame_size; + + continue; + } + + XELOGD("LEN: %d (%x)", len, len); + + data->input_buffer_read_offset += len; + last_input_read_pos_ = data->input_buffer_read_offset; + + // Copy to the output buffer. + // Successfully decoded a frame. + size_t written_bytes = 0; + if (got_frame) { + // Validity checks. + if (decoded_frame_->nb_samples > kSamplesPerFrame) { + return; + } else if (context_->sample_fmt != AV_SAMPLE_FMT_FLTP) { + return; + } + + // Check the returned buffer size. + if (av_samples_get_buffer_size(NULL, context_->channels, + decoded_frame_->nb_samples, + context_->sample_fmt, 1) != + context_->channels * decoded_frame_->nb_samples * sizeof(float)) { + return; + } + + // Loop through every sample, convert and drop it into the output array. + // If more than one channel, the game wants the samples from each channel + // interleaved next to each other. + uint32_t o = 0; + for (int i = 0; i < decoded_frame_->nb_samples; i++) { + for (int j = 0; j < context_->channels; j++) { + // Select the appropriate array based on the current channel. + auto sample_array = reinterpret_cast(decoded_frame_->data[j]); + + // Raw sample should be within [-1, 1]. + // Clamp it, just in case. + float raw_sample = xe::saturate(sample_array[i]); + + // Convert the sample and output it in big endian. + float scaled_sample = raw_sample * ((1 << 15) - 1); + int sample = static_cast(scaled_sample); + xe::store_and_swap(¤t_frame_[o++ * 2], + sample & 0xFFFF); + } + } + current_frame_pos_ = 0; + + if (output_remaining_bytes < kBytesPerFrame * num_channels) { + // Output buffer isn't big enough to store the entire frame! Write out a + // part of it. + current_frame_pos_ = output_remaining_bytes; + output_rb.Write(current_frame_, output_remaining_bytes); + + written_bytes = output_remaining_bytes; + } else { + output_rb.Write(current_frame_, kBytesPerFrame * num_channels); + + written_bytes = kBytesPerFrame * num_channels; + } + } + + output_remaining_bytes -= written_bytes; + data->output_buffer_write_offset = output_rb.write_offset() / 256; } // The game will kick us again with a new output buffer later. data->output_buffer_valid = 0; } +uint32_t XmaContext::GetFramePacketNumber(uint8_t* block, size_t size, + size_t bit_offset) { + size *= 8; + if (bit_offset >= size) { + // Not good :( + assert_always(); + return -1; + } + + size_t byte_offset = bit_offset >> 3; + size_t packet_number = byte_offset / kBytesPerPacket; + + return (uint32_t)packet_number; +} + +int XmaContext::PrepareDecoder(uint8_t* block, size_t size, int sample_rate, + int channels) { + // Sanity check: Packet metadata is always 1 for XMA2 + assert_true((block[2] & 0x7) == 1); + + sample_rate = GetSampleRate(sample_rate); + + // Re-initialize the context with new sample rate and channels. + if (context_->sample_rate != sample_rate || context_->channels != channels) { + // We have to reopen the codec so it'll realloc whatever data it needs. + // TODO(DrChat): Find a better way. + avcodec_close(context_); + + context_->sample_rate = sample_rate; + context_->channels = channels; + extra_data_.channel_mask = + channels == 2 ? AV_CH_LAYOUT_STEREO : AV_CH_LAYOUT_MONO; + + if (avcodec_open2(context_, codec_, NULL) < 0) { + XELOGE("XmaContext: Failed to reopen libav context"); + return 1; + } + } + + av_frame_unref(decoded_frame_); + + return 0; +} + int XmaContext::StartPacket(XMA_CONTEXT_DATA* data) { // Translate pointers for future use. uint8_t* in0 = data->input_buffer_0_valid @@ -340,41 +456,49 @@ int XmaContext::StartPacket(XMA_CONTEXT_DATA* data) { // Total input size uint32_t input_size_bytes = input_size_0_bytes + input_size_1_bytes; + // Calculate the first frame offset we need to decode. + uint32_t frame_offset_bits = (data->input_buffer_read_offset % (2048 * 8)); + // Input read offset is in bits. Typically starts at 32 (4 bytes). // "Sequence" offset - used internally for WMA Pro decoder. // Just the read offset. - uint32_t seq_offset_bytes = (data->input_buffer_read_offset & ~0x7FF) / 8; - uint32_t input_remaining_bytes = input_size_bytes - seq_offset_bytes; + // NOTE: Read offset may not be at the first frame in a packet! + uint32_t packet_offset_bytes = (data->input_buffer_read_offset & ~0x7FF) / 8; + if (packet_offset_bytes % 2048 != 0) { + packet_offset_bytes -= packet_offset_bytes % 2048; + } + uint32_t input_remaining_bytes = input_size_bytes - packet_offset_bytes; - if (seq_offset_bytes < input_size_bytes) { - // Setup input offset and input buffer. - uint32_t input_offset_bytes = seq_offset_bytes; - auto input_buffer = in0; - - if (seq_offset_bytes >= input_size_0_bytes && input_size_1_bytes) { - // Size overlap, select input buffer 1. - // TODO(DrChat): This needs testing. - input_offset_bytes -= input_size_0_bytes; - input_buffer = in1; - } - - // Still have data to read. - auto packet = input_buffer + input_offset_bytes; - assert_true(input_offset_bytes % 2048 == 0); - PreparePacket(packet, seq_offset_bytes, kBytesPerPacket, sample_rate, - channels); - data->input_buffer_read_offset += kBytesPerPacket * 8; - - input_remaining_bytes -= kBytesPerPacket; - if (input_remaining_bytes <= 0) { - // Used the last of the data but prepared a packet. - return 0; - } - } else { + if (packet_offset_bytes >= input_size_bytes) { // No more data available and no packet prepared. return -1; } + // Setup input offset and input buffer. + uint32_t input_offset_bytes = packet_offset_bytes; + auto input_buffer = in0; + + if (packet_offset_bytes >= input_size_0_bytes && input_size_1_bytes) { + // Size overlap, select input buffer 1. + // TODO(DrChat): This needs testing. + input_offset_bytes -= input_size_0_bytes; + input_buffer = in1; + } + + // Still have data to read. + auto packet = input_buffer + input_offset_bytes; + assert_true(input_offset_bytes % 2048 == 0); + PreparePacket(packet, packet_offset_bytes, kBytesPerPacket, sample_rate, + channels); + + data->input_buffer_read_offset += kBytesPerPacket * 8; + + input_remaining_bytes -= kBytesPerPacket; + if (input_remaining_bytes <= 0) { + // Used the last of the data but prepared a packet. + return 0; + } + return input_remaining_bytes; } @@ -390,15 +514,11 @@ int XmaContext::PreparePacket(uint8_t* input, size_t seq_offset, size_t size, return 1; } - std::memcpy(packet_data_, input, size); + // Packet metadata is always 1 for XMA2 + assert_true((input[2] & 0x7) == 1); - // Modify the packet header so it's WMAPro compatible. - auto int_packet_data = reinterpret_cast(packet_data_); - *int_packet_data = - (((seq_offset & 0x7800) | 0x400) >> 7) | (*int_packet_data & 0xFFFEFF08); - - packet_->data = packet_data_; - packet_->size = kBytesPerPacket; + packet_->data = input; + packet_->size = (int)size; // Re-initialize the context with new sample rate and channels. if (context_->sample_rate != sample_rate || context_->channels != channels) { diff --git a/src/xenia/apu/xma_context.h b/src/xenia/apu/xma_context.h index 06e2299b8..7298ab168 100644 --- a/src/xenia/apu/xma_context.h +++ b/src/xenia/apu/xma_context.h @@ -60,8 +60,8 @@ struct XMA_CONTEXT_DATA { uint32_t input_buffer_1_packet_count : 12; // XMASetInputBuffer1, number of // 2KB packets. Max 4095 packets. // These packets form a block. - uint32_t loop_subframe_end : 2; // +12bit, XMASetLoopData - uint32_t unk_dword_1_a : 3; // ? might be loop_subframe_skip + uint32_t loop_subframe_start : 2; // +12bit, XMASetLoopData + uint32_t loop_subframe_end : 3; // +14bit, XMASetLoopData uint32_t loop_subframe_skip : 3; // +17bit, XMASetLoopData might be // subframe_decode_count uint32_t subframe_decode_count : 4; // +20bit might be subframe_skip_count @@ -91,7 +91,7 @@ struct XMA_CONTEXT_DATA { // DWORD 7 uint32_t output_buffer_ptr; // physical address // DWORD 8 - uint32_t overlap_add_ptr; // PtrOverlapAdd(?) + uint32_t work_buffer_ptr; // PtrOverlapAdd(?) // DWORD 9 // +0bit, XMAGetOutputBufferReadOffset AKA WriteBufferOffsetRead @@ -133,6 +133,7 @@ class XmaContext { static const uint32_t kBytesPerSample = 2; static const uint32_t kSamplesPerFrame = 512; static const uint32_t kSamplesPerSubframe = 128; + static const uint32_t kBytesPerFrame = kSamplesPerFrame * kBytesPerSample; static const uint32_t kBytesPerSubframe = kSamplesPerSubframe * kBytesPerSample; @@ -165,6 +166,10 @@ class XmaContext { static int GetSampleRate(int id); void DecodePackets(XMA_CONTEXT_DATA* data); + uint32_t GetFramePacketNumber(uint8_t* block, size_t size, size_t bit_offset); + int PrepareDecoder(uint8_t* block, size_t size, int sample_rate, + int channels); + int StartPacket(XMA_CONTEXT_DATA* data); int PreparePacket(uint8_t* input, size_t seq_offset, size_t size, @@ -189,11 +194,11 @@ class XmaContext { AVPacket* packet_ = nullptr; WmaProExtraData extra_data_; + // If we didn't finish writing a frame to the output buffer, this is the offset. size_t current_frame_pos_ = 0; + uint32_t last_input_read_pos_ = 0; // Last seen read buffer pos uint8_t* current_frame_ = nullptr; uint32_t frame_samples_size_ = 0; - - uint8_t packet_data_[kBytesPerPacket]; }; } // namespace apu diff --git a/src/xenia/kernel/xboxkrnl_audio_xma.cc b/src/xenia/kernel/xboxkrnl_audio_xma.cc index a7fec8cb7..e664e7ca4 100644 --- a/src/xenia/kernel/xboxkrnl_audio_xma.cc +++ b/src/xenia/kernel/xboxkrnl_audio_xma.cc @@ -171,7 +171,7 @@ SHIM_CALL XMASetLoopData_shim(PPCContext* ppc_context, context.loop_end = loop_data->loop_end; context.loop_count = loop_data->loop_count; context.loop_subframe_end = loop_data->loop_subframe_end; - context.loop_subframe_skip = loop_data->loop_subframe_end; + context.loop_subframe_skip = loop_data->loop_subframe_skip; context.Store(SHIM_MEM_ADDR(context_ptr)); diff --git a/third_party/libav b/third_party/libav new file mode 160000 index 000000000..4752bdcdb --- /dev/null +++ b/third_party/libav @@ -0,0 +1 @@ +Subproject commit 4752bdcdbaf61afc933e667a016bca4eb389ac21