diff --git a/src/xenia/apu/debug_visualizers.natvis b/src/xenia/apu/debug_visualizers.natvis new file mode 100644 index 000000000..095fce6ad --- /dev/null +++ b/src/xenia/apu/debug_visualizers.natvis @@ -0,0 +1,9 @@ + + + + + + id={id_}, allocated={is_allocated_}, enabled={is_enabled_} + + + diff --git a/src/xenia/apu/xma_context.cc b/src/xenia/apu/xma_context.cc index 2bc1c6e55..17f06cabc 100644 --- a/src/xenia/apu/xma_context.cc +++ b/src/xenia/apu/xma_context.cc @@ -215,6 +215,7 @@ void XmaContext::DecodePackets(XMA_CONTEXT_DATA* data) { // Quick die if there's no data. if (!data->input_buffer_0_valid && !data->input_buffer_1_valid) { + XELOGAPU("Context %d: No valid input buffers!", id()); return; } @@ -224,25 +225,36 @@ void XmaContext::DecodePackets(XMA_CONTEXT_DATA* data) { return; } + assert_zero(data->unk_dword_9); + // XAudio Loops // loop_count: // - XAUDIO2_MAX_LOOP_COUNT = 254 // - XAUDIO2_LOOP_INFINITE = 255 // loop_start/loop_end are bit offsets to a specific frame - //assert_true(data->loop_count == 0); // Translate pointers for future use. + // Sometimes the game will use rolling input buffers. If they do, we cannot + // assume they form a complete block! In addition, the buffers DO NOT have + // to be sequential! + // (bit.trip runner 2 does this) + // TODO: Collect partial frames into a buffer if the game uses rolling buffers, + // and present the full frame to libav when we get it. uint8_t* in0 = data->input_buffer_0_valid ? memory()->TranslatePhysical(data->input_buffer_0_ptr) : nullptr; uint8_t* in1 = data->input_buffer_1_valid ? memory()->TranslatePhysical(data->input_buffer_1_ptr) : nullptr; + uint8_t* current_input_buffer = in0; size_t input_buffer_0_size = data->input_buffer_0_packet_count * kBytesPerPacket; size_t input_buffer_1_size = data->input_buffer_1_packet_count * kBytesPerPacket; + size_t current_input_size = + data->current_buffer ? input_buffer_1_size : input_buffer_0_size; + size_t input_total_size = input_buffer_0_size + input_buffer_1_size; // Output buffers are in raw PCM samples, 256 bytes per block. // Output buffer is a ring buffer. We need to write from the write offset @@ -289,50 +301,111 @@ void XmaContext::DecodePackets(XMA_CONTEXT_DATA* data) { continue; } - int block_last_frame = 0; // last frame in block? - int got_frame = 0; // successfully decoded a frame? + int invalid_frame = 0; // invalid frame? + int got_frame = 0; // successfully decoded a frame? int frame_size = 0; - packet_->data = in0; - packet_->size = data->input_buffer_0_packet_count * 2048; - PrepareDecoder(in0, data->input_buffer_0_packet_count * 2048, - data->sample_rate, num_channels); + packet_->data = current_input_buffer; + packet_->size = (int)current_input_size; + PrepareDecoder(in0, current_input_size, data->sample_rate, num_channels); int len = xma2_decode_frame(context_, packet_, decoded_frame_, &got_frame, - &block_last_frame, &frame_size, + &invalid_frame, &frame_size, 1, data->input_buffer_read_offset); - if (block_last_frame) { - data->input_buffer_0_valid = 0; - data->input_buffer_1_valid = 0; - data->output_buffer_valid = 0; - continue; + if (invalid_frame) { + // Invalid frame/packet: length header is 0x7FFF + // Sometimes there's frames in the middle of the stream flagged as + // invalid. + // Double-check to make sure we're not in the middle. + uint32_t frame_byte_offset = data->input_buffer_read_offset >> 3; + uint32_t packet_number = frame_byte_offset / 2048; + if (packet_number < data->input_buffer_0_packet_count - 1) { + // Okay. Skip to the beginning of the next packet. + packet_number++; + data->input_buffer_read_offset = (packet_number * 2048 * 8) + 32; + continue; + } + + // Last frame of the block. Swap buffers if necessary. + if (data->current_buffer == 0) { + if (data->input_buffer_1_valid) { + data->current_buffer++; + } else { + // End of input. + data->input_buffer_read_offset = input_total_size * 8; + } + + data->input_buffer_0_valid = 0; + return; + } else { + // End of input. + data->current_buffer = 0; + data->input_buffer_1_valid = 0; + data->input_buffer_read_offset = input_total_size * 8; + return; + } + } else if (got_frame && len > 0) { + // Valid frame. + // Check and see if we need to loop back to any spot. + if (data->loop_count > 0 && + data->input_buffer_read_offset == data->loop_end) { + // Loop back to the beginning. + data->input_buffer_read_offset = data->loop_start; + if (data->loop_count < 255) { + data->loop_count--; + } + } else { + data->input_buffer_read_offset += len; + if (data->current_buffer == 0 && + data->input_buffer_read_offset > input_buffer_0_size * 8) { + // Overflow? Setup next buffer. + data->current_buffer++; + data->input_buffer_0_valid = 0; + } else if (data->input_buffer_read_offset > input_total_size * 8) { + // Overflow! The game will fix up the read offset. + data->current_buffer = 0; + data->input_buffer_0_valid = 0; + data->input_buffer_1_valid = 0; + } + } } - if (len == AVERROR_EOF) { - // Screw this gtfo - data->input_buffer_0_valid = 0; - data->input_buffer_1_valid = 0; - data->output_buffer_valid = 0; + if ((len < 0 || !got_frame) && frame_size != 0) { + // Oh no! Skip the frame and hope everything works. + data->input_buffer_read_offset += frame_size; + data->input_buffer_read_offset = (uint32_t)xma2_correct_frame_offset( + in0, input_buffer_0_size, data->input_buffer_read_offset); continue; } else if (len < 0 || !got_frame) { - // Oh no! Skip the frame and hope everything works. - data->input_buffer_read_offset += frame_size; - - continue; + // Did not get frame and could not get frame size. + data->input_buffer_0_valid = 0; + data->input_buffer_1_valid = 0; + return; } - XELOGD("LEN: %d (%x)", len, len); - - data->input_buffer_read_offset += len; + // Sometimes we may run up to <15 bits before the next packet. If this + // happens, we need to automatically advance to the next frame. + // We'll ask the XMA2 decoder to do this for us, since it's more qualified. + data->input_buffer_read_offset = (uint32_t)xma2_correct_frame_offset( + in0, input_buffer_0_size, data->input_buffer_read_offset); last_input_read_pos_ = data->input_buffer_read_offset; + if (data->input_buffer_read_offset == 0) { + // Invalid offset. Out of data. + data->input_buffer_0_valid = 0; + data->input_buffer_1_valid = 0; + } + // Copy to the output buffer. // Successfully decoded a frame. size_t written_bytes = 0; if (got_frame) { +#ifdef DEBUG // Validity checks. if (decoded_frame_->nb_samples > kSamplesPerFrame) { + XELOGAPU("Decoded frame has an invalid sample count!"); return; } else if (context_->sample_fmt != AV_SAMPLE_FMT_FLTP) { + XELOGAPU("libav decoder did not output floating point samples!"); return; } @@ -343,27 +416,11 @@ void XmaContext::DecodePackets(XMA_CONTEXT_DATA* data) { context_->channels * decoded_frame_->nb_samples * sizeof(float)) { return; } +#endif - // Loop through every sample, convert and drop it into the output array. - // If more than one channel, the game wants the samples from each channel - // interleaved next to each other. - uint32_t o = 0; - for (int i = 0; i < decoded_frame_->nb_samples; i++) { - for (int j = 0; j < context_->channels; j++) { - // Select the appropriate array based on the current channel. - auto sample_array = reinterpret_cast(decoded_frame_->data[j]); - - // Raw sample should be within [-1, 1]. - // Clamp it, just in case. - float raw_sample = xe::saturate(sample_array[i]); - - // Convert the sample and output it in big endian. - float scaled_sample = raw_sample * ((1 << 15) - 1); - int sample = static_cast(scaled_sample); - xe::store_and_swap(¤t_frame_[o++ * 2], - sample & 0xFFFF); - } - } + // Convert the frame. + ConvertFrame((const float**)decoded_frame_->data, context_->channels, + decoded_frame_->nb_samples, current_frame_); current_frame_pos_ = 0; if (output_remaining_bytes < kBytesPerFrame * num_channels) { @@ -385,6 +442,7 @@ void XmaContext::DecodePackets(XMA_CONTEXT_DATA* data) { } // The game will kick us again with a new output buffer later. + // It's important that we only invalidate this if we actually wrote to it!! data->output_buffer_valid = 0; } @@ -405,8 +463,8 @@ uint32_t XmaContext::GetFramePacketNumber(uint8_t* block, size_t size, int XmaContext::PrepareDecoder(uint8_t* block, size_t size, int sample_rate, int channels) { - // Sanity check: Packet metadata is always 1 for XMA2 - assert_true((block[2] & 0x7) == 1); + // Sanity check: Packet metadata is always 1 for XMA2/0 for XMA + assert_true((block[2] & 0x7) == 1 || (block[2] & 0x7) == 0); sample_rate = GetSampleRate(sample_rate); @@ -432,6 +490,32 @@ int XmaContext::PrepareDecoder(uint8_t* block, size_t size, int sample_rate, return 0; } +bool XmaContext::ConvertFrame(const float** samples, int num_channels, + int num_samples, uint8_t* output_buffer) { + // Loop through every sample, convert and drop it into the output array. + // If more than one channel, we need to interleave the samples from each + // channel next to each other. + // TODO: This can definitely be optimized with AVX/SSE intrinsics! + uint32_t o = 0; + for (int i = 0; i < num_samples; i++) { + for (int j = 0; j < num_channels; j++) { + // Select the appropriate array based on the current channel. + auto sample_array = samples[j]; + + // Raw sample should be within [-1, 1]. + // Clamp it, just in case. + float raw_sample = xe::saturate(sample_array[i]); + + // Convert the sample and output it in big endian. + float scaled_sample = raw_sample * ((1 << 15) - 1); + int sample = static_cast(scaled_sample); + xe::store_and_swap(&output_buffer[o++ * 2], sample & 0xFFFF); + } + } + + return true; +} + int XmaContext::StartPacket(XMA_CONTEXT_DATA* data) { // Translate pointers for future use. uint8_t* in0 = data->input_buffer_0_valid diff --git a/src/xenia/apu/xma_context.h b/src/xenia/apu/xma_context.h index 7298ab168..a0f769f38 100644 --- a/src/xenia/apu/xma_context.h +++ b/src/xenia/apu/xma_context.h @@ -60,8 +60,8 @@ struct XMA_CONTEXT_DATA { uint32_t input_buffer_1_packet_count : 12; // XMASetInputBuffer1, number of // 2KB packets. Max 4095 packets. // These packets form a block. - uint32_t loop_subframe_start : 2; // +12bit, XMASetLoopData - uint32_t loop_subframe_end : 3; // +14bit, XMASetLoopData + uint32_t loop_subframe_start : 2; // +12bit, XMASetLoopData + uint32_t loop_subframe_end : 3; // +14bit, XMASetLoopData uint32_t loop_subframe_skip : 3; // +17bit, XMASetLoopData might be // subframe_decode_count uint32_t subframe_decode_count : 4; // +20bit might be subframe_skip_count @@ -77,10 +77,12 @@ struct XMA_CONTEXT_DATA { // DWORD 3 uint32_t loop_start : 26; // XMASetLoopData LoopStartOffset + // frame offset in bits uint32_t unk_dword_3 : 6; // ? ParserErrorStatus/ParserErrorSet(?) // DWORD 4 uint32_t loop_end : 26; // XMASetLoopData LoopEndOffset + // frame offset in bits uint32_t packet_metadata : 5; // XMAGetPacketMetadata uint32_t current_buffer : 1; // ? @@ -170,6 +172,9 @@ class XmaContext { int PrepareDecoder(uint8_t* block, size_t size, int sample_rate, int channels); + bool ConvertFrame(const float** samples, int num_channels, int num_samples, + uint8_t* output_buffer); + int StartPacket(XMA_CONTEXT_DATA* data); int PreparePacket(uint8_t* input, size_t seq_offset, size_t size, diff --git a/src/xenia/apu/xma_decoder.cc b/src/xenia/apu/xma_decoder.cc index d8bd47ef0..766ce231b 100644 --- a/src/xenia/apu/xma_decoder.cc +++ b/src/xenia/apu/xma_decoder.cc @@ -55,7 +55,34 @@ XmaDecoder::XmaDecoder(cpu::Processor* processor) XmaDecoder::~XmaDecoder() = default; void av_log_callback(void* avcl, int level, const char* fmt, va_list va) { - xe::LogLineVarargs('A', fmt, va); +#ifdef NDEBUG + if (level > AV_LOG_WARNING) { + return; + } +#endif + + char level_char = '?'; + switch (level) { + case AV_LOG_ERROR: + level_char = '!'; + break; + case AV_LOG_WARNING: + level_char = 'w'; + break; + case AV_LOG_INFO: + level_char = 'i'; + break; + case AV_LOG_VERBOSE: + level_char = 'v'; + break; + case AV_LOG_DEBUG: + level_char = 'd'; + break; + } + + StringBuffer buff; + buff.AppendVarargs(fmt, va); + xe::LogLineVarargs(level_char, "libav: %s", buff.GetString()); } X_STATUS XmaDecoder::Setup(kernel::KernelState* kernel_state) { @@ -86,7 +113,7 @@ X_STATUS XmaDecoder::Setup(kernel::KernelState* kernel_state) { } registers_.next_context = 1; - worker_running_ = true; + //worker_running_ = true; worker_thread_ = kernel::object_ref( new kernel::XHostThread(kernel_state, 128 * 1024, 0, [this]() { WorkerThreadMain(); @@ -213,6 +240,7 @@ void XmaDecoder::WriteRegister(uint32_t addr, uint32_t value) { uint32_t context_id = base_context_id + i; XmaContext& context = contexts_[context_id]; context.Enable(); + context.Work(); } } diff --git a/src/xenia/apu/xma_helpers.h b/src/xenia/apu/xma_helpers.h new file mode 100644 index 000000000..8efc9d1d1 --- /dev/null +++ b/src/xenia/apu/xma_helpers.h @@ -0,0 +1,43 @@ +/** +****************************************************************************** +* Xenia : Xbox 360 Emulator Research Project * +****************************************************************************** +* Copyright 2015 Ben Vanik. All rights reserved. * +* Released under the BSD license - see LICENSE in the root for more details. * +****************************************************************************** +*/ + +// This file contains some functions used to help parse XMA data. + +#ifndef XENIA_APU_XMA_HELPERS_H_ +#define XENIA_APU_XMA_HELPERS_H_ + +#include + +namespace xe { +namespace apu { +namespace xma { + +// Get number of frames that /begin/ in this packet. +uint32_t GetPacketFrameCount(uint8_t* packet) { + return (uint8_t)(packet[0] >> 2); +} + +// Get the first frame offset in bits +uint32_t GetPacketFrameOffset(uint8_t* packet) { + return (uint16_t)((packet[0] << 13) | (packet[1] << 5) | (packet[2] >> 3)) + 32; +} + +uint32_t GetPacketMetadata(uint8_t* packet) { + return (uint8_t)(packet[2] & 0x7); +} + +uint32_t GetPacketSkipCount(uint8_t* packet) { + return (uint8_t)(packet[3]); +} + +} // namespace xma +} // namespace apu +} // namespace xe + +#endif // XENIA_APU_XMA_HELPERS_H_ diff --git a/third_party/libav b/third_party/libav index 4752bdcdb..8be22f03d 160000 --- a/third_party/libav +++ b/third_party/libav @@ -1 +1 @@ -Subproject commit 4752bdcdbaf61afc933e667a016bca4eb389ac21 +Subproject commit 8be22f03d7e3c1663a66cc09375f840a7fc9a365