diff --git a/src/xenia/apu/xma_context.cc b/src/xenia/apu/xma_context.cc index 17f06cabc..87ac2afa8 100644 --- a/src/xenia/apu/xma_context.cc +++ b/src/xenia/apu/xma_context.cc @@ -14,6 +14,7 @@ #include "xenia/apu/xma_decoder.h" #include "xenia/apu/xma_helpers.h" +#include "xenia/base/bit_stream.h" #include "xenia/base/logging.h" #include "xenia/base/ring_buffer.h" #include "xenia/profiling.h" @@ -87,6 +88,8 @@ int XmaContext::Setup(uint32_t id, Memory* memory, uint32_t guest_ptr) { context_->extradata_size = sizeof(extra_data_); context_->extradata = reinterpret_cast(&extra_data_); + partial_frame_buffer_.resize(2048); + // Current frame stuff whatever // samples per frame * 2 max channels * output bytes current_frame_ = new uint8_t[kSamplesPerFrame * kBytesPerSample * 2]; @@ -98,11 +101,11 @@ int XmaContext::Setup(uint32_t id, Memory* memory, uint32_t guest_ptr) { } void XmaContext::Work() { + std::lock_guard lock(lock_); if (!is_allocated() || !is_enabled()) { return; } - std::lock_guard lock(lock_); set_is_enabled(false); auto context_ptr = memory()->TranslateVirtual(guest_ptr()); @@ -117,10 +120,11 @@ void XmaContext::Enable() { auto context_ptr = memory()->TranslateVirtual(guest_ptr()); XMA_CONTEXT_DATA data(context_ptr); - XELOGAPU("XmaContext: kicking context %d (%d/%d bits)", id(), - data.input_buffer_read_offset, (data.input_buffer_0_packet_count + - data.input_buffer_1_packet_count) * - kBytesPerPacket * 8); + XELOGAPU("XmaContext: kicking context %d (buffer %d %d/%d bits)", id(), + data.current_buffer, data.input_buffer_read_offset, + (data.current_buffer == 0 ? data.input_buffer_0_packet_count + : data.input_buffer_1_packet_count) * + kBytesPerPacket * 8); data.Store(context_ptr); @@ -142,8 +146,6 @@ void XmaContext::Clear() { std::lock_guard lock(lock_); XELOGAPU("XmaContext: reset context %d", id()); - DiscardPacket(); - auto context_ptr = memory()->TranslateVirtual(guest_ptr()); XMA_CONTEXT_DATA data(context_ptr); @@ -171,8 +173,6 @@ void XmaContext::Release() { set_is_allocated(false); auto context_ptr = memory()->TranslateVirtual(guest_ptr()); std::memset(context_ptr, 0, sizeof(XMA_CONTEXT_DATA)); // Zero it. - - DiscardPacket(); } int XmaContext::GetSampleRate(int id) { @@ -190,6 +190,83 @@ int XmaContext::GetSampleRate(int id) { return 0; } +size_t XmaContext::SavePartial(uint8_t* packet, uint32_t frame_offset_bits, + size_t frame_size_bits, bool append) { + uint8_t* buff = partial_frame_buffer_.data(); + + BitStream stream(packet, 2048 * 8); + stream.SetOffset(frame_offset_bits); + + if (!append) { + // Reset the buffer. + // TODO: Probably not necessary. + std::memset(buff, 0, partial_frame_buffer_.size()); + + size_t copy_bits = (2048 * 8) - frame_offset_bits; + size_t copy_offset = stream.Copy(buff, copy_bits); + partial_frame_offset_bits_ = copy_bits; + partial_frame_start_offset_bits_ = copy_offset; + + return copy_bits; + } else { + size_t copy_bits = frame_size_bits - partial_frame_offset_bits_; + size_t copy_offset = stream.Copy( + buff + + ((partial_frame_offset_bits_ + partial_frame_start_offset_bits_) / + 8), + copy_bits); + + partial_frame_offset_bits_ += copy_bits; + + return copy_bits; + } +} + +bool XmaContext::ValidFrameOffset(uint8_t* block, size_t size_bytes, + size_t frame_offset_bits) { + uint32_t packet_num = + GetFramePacketNumber(block, size_bytes, frame_offset_bits); + uint8_t* packet = block + (packet_num * kBytesPerPacket); + size_t relative_offset_bits = frame_offset_bits % (kBytesPerPacket * 8); + + uint32_t first_frame_offset = xma::GetPacketFrameOffset(packet); + if (first_frame_offset == -1) { + // Packet only contains a partial frame, so no frames can start here. + return false; + } + + BitStream stream(packet, kBytesPerPacket * 8); + stream.SetOffset(first_frame_offset); + while (true) { + if (stream.offset_bits() == relative_offset_bits) { + return true; + } + + if (stream.BitsRemaining() < 15) { + // Not enough room for another frame header. + return false; + } + + uint64_t size = stream.Read(15); + if ((size - 15) > stream.BitsRemaining()) { + // Last frame. + return false; + } else if (size == 0x7FFF) { + // Invalid frame (and last of this packet) + return false; + } + + stream.Advance(size - 16); + + // Read the trailing bit to see if frames follow + if (stream.Read(1) == 0) { + break; + } + } + + return false; +} + void XmaContext::DecodePackets(XMA_CONTEXT_DATA* data) { SCOPE_profile_cpu_f("apu"); @@ -203,22 +280,16 @@ void XmaContext::DecodePackets(XMA_CONTEXT_DATA* data) { // 32bit header (big endian) // Frames are the smallest thing the SPUs can decode. - // They usually can span packets (libav handles this) + // They can and usually will span packets. // Sample rates (data.sample_rate): - // 0 - 24 kHz ? + // 0 - 24 kHz // 1 - 32 kHz - // 2 - 44.1 kHz ? - // 3 - 48 kHz ? + // 2 - 44.1 kHz + // 3 - 48 kHz // SPUs also support stereo decoding. (data.is_stereo) - // Quick die if there's no data. - if (!data->input_buffer_0_valid && !data->input_buffer_1_valid) { - XELOGAPU("Context %d: No valid input buffers!", id()); - return; - } - // Check the output buffer - we cannot decode anything else if it's // unavailable. if (!data->output_buffer_valid) { @@ -236,25 +307,28 @@ void XmaContext::DecodePackets(XMA_CONTEXT_DATA* data) { // Translate pointers for future use. // Sometimes the game will use rolling input buffers. If they do, we cannot // assume they form a complete block! In addition, the buffers DO NOT have - // to be sequential! - // (bit.trip runner 2 does this) - // TODO: Collect partial frames into a buffer if the game uses rolling buffers, - // and present the full frame to libav when we get it. + // to be contiguous! uint8_t* in0 = data->input_buffer_0_valid ? memory()->TranslatePhysical(data->input_buffer_0_ptr) : nullptr; uint8_t* in1 = data->input_buffer_1_valid ? memory()->TranslatePhysical(data->input_buffer_1_ptr) : nullptr; - uint8_t* current_input_buffer = in0; + uint8_t* current_input_buffer = data->current_buffer ? in1 : in0; + + XELOGAPU("Processing context %d (offset %d, buffer %d, ptr %.8X)", id(), + data->input_buffer_read_offset, data->current_buffer, + current_input_buffer); size_t input_buffer_0_size = data->input_buffer_0_packet_count * kBytesPerPacket; size_t input_buffer_1_size = data->input_buffer_1_packet_count * kBytesPerPacket; + size_t input_total_size = input_buffer_0_size + input_buffer_1_size; + size_t current_input_size = data->current_buffer ? input_buffer_1_size : input_buffer_0_size; - size_t input_total_size = input_buffer_0_size + input_buffer_1_size; + size_t current_input_packet_count = current_input_size / kBytesPerPacket; // Output buffers are in raw PCM samples, 256 bytes per block. // Output buffer is a ring buffer. We need to write from the write offset @@ -272,14 +346,10 @@ void XmaContext::DecodePackets(XMA_CONTEXT_DATA* data) { output_rb.set_write_offset(output_write_offset); size_t output_remaining_bytes = output_rb.write_count(); + bool output_written = false; // Decode until we can't write any more data. while (output_remaining_bytes > 0) { - if (!data->input_buffer_0_valid && !data->input_buffer_1_valid) { - // Out of data. - break; - } - int num_channels = data->is_stereo ? 2 : 1; // Check if we have part of a frame waiting (and the game hasn't jumped @@ -289,7 +359,10 @@ void XmaContext::DecodePackets(XMA_CONTEXT_DATA* data) { size_t to_write = std::min( output_remaining_bytes, ((size_t)kBytesPerFrame * num_channels - current_frame_pos_)); - output_rb.Write(current_frame_, to_write); + output_rb.Write(current_frame_ + current_frame_pos_, to_write); + output_written = true; + XELOGAPU("XmaContext %d: wrote out %d bytes of left-over samples", id(), + to_write); current_frame_pos_ += to_write; if (current_frame_pos_ >= kBytesPerFrame * num_channels) { @@ -301,48 +374,189 @@ void XmaContext::DecodePackets(XMA_CONTEXT_DATA* data) { continue; } + if (!data->input_buffer_0_valid && !data->input_buffer_1_valid) { + // Out of data. + break; + } + + if (data->input_buffer_read_offset == 0) { + // Invalid offset. Go ahead and set it. + uint32_t offset = xma::GetPacketFrameOffset(current_input_buffer); + if (offset == -1) { + // No more frames. + if (data->current_buffer == 0) { + data->input_buffer_0_valid = 0; + data->input_buffer_read_offset = 0; + data->current_buffer++; + } else if (data->current_buffer == 1) { + data->input_buffer_1_valid = 0; + data->input_buffer_read_offset = 0; + data->current_buffer--; + } + + // Die if we have no partial saved. + if (!partial_frame_saved_) { + return; + } + } else { + data->input_buffer_read_offset = offset; + } + } + + if (!ValidFrameOffset(current_input_buffer, current_input_size, + data->input_buffer_read_offset)) { + XELOGAPU("XmaContext %d: Invalid read offset %d!", id(), + data->input_buffer_read_offset); + if (data->current_buffer == 0) { + data->current_buffer = 1; + data->input_buffer_0_valid = 0; + } else if (data->current_buffer == 1) { + data->current_buffer = 0; + data->input_buffer_1_valid = 0; + } + + data->input_buffer_read_offset = 0; + return; + } + + // Check if we need to save a partial frame. + if (data->input_buffer_read_offset != 0 && !partial_frame_saved_ && + GetFramePacketNumber(current_input_buffer, current_input_size, + data->input_buffer_read_offset) == + current_input_packet_count - 1) { + BitStream stream(current_input_buffer, current_input_size * 8); + stream.SetOffset(data->input_buffer_read_offset); + + if (stream.BitsRemaining() >= 15) { + uint64_t frame_size = stream.Read(15); + if (data->input_buffer_read_offset + frame_size >= + current_input_size * 8 && + frame_size != 0x7FFF) { + uint32_t rel_offset = data->input_buffer_read_offset % (2048 * 8); + + // Frame is cut off! Save and exit. + partial_frame_saved_ = true; + partial_frame_size_known_ = true; + partial_frame_total_size_bits_ = frame_size; + SavePartial( + current_input_buffer + (current_input_packet_count - 1) * 2048, + rel_offset, frame_size, false); + } + } else { + // Header cut in half :/ + uint32_t rel_offset = data->input_buffer_read_offset % (2048 * 8); + + partial_frame_saved_ = true; + partial_frame_size_known_ = false; + SavePartial( + current_input_buffer + (current_input_packet_count - 1) * 2048, + rel_offset, 0, false); + } + + if (partial_frame_saved_) { + XELOGAPU("XmaContext %d: saved a partial frame", id()); + + if (data->current_buffer == 0) { + data->input_buffer_0_valid = 0; + data->input_buffer_read_offset = 0; + data->current_buffer++; + } else if (data->current_buffer == 1) { + data->input_buffer_1_valid = 0; + data->input_buffer_read_offset = 0; + data->current_buffer--; + } + + return; + } + } + + if (partial_frame_saved_ && !partial_frame_size_known_) { + // Append the rest of the header. + size_t offset = SavePartial(current_input_buffer, 32, 15, true); + + // Read the frame size. + BitStream stream(partial_frame_buffer_.data(), + 15 + partial_frame_start_offset_bits_); + stream.SetOffset(partial_frame_start_offset_bits_); + + uint64_t size = stream.Read(15); + partial_frame_size_known_ = true; + partial_frame_total_size_bits_ = size; + + // Now append the rest of the frame. + SavePartial(current_input_buffer, 32 + (uint32_t)offset, size, true); + } else if (partial_frame_saved_) { + // Append the rest of the frame. + SavePartial(current_input_buffer, 32, partial_frame_total_size_bits_, + true); + } + + // Prepare the decoder. Reinitialize if any parameters have changed. + PrepareDecoder(current_input_buffer, current_input_size, data->sample_rate, + num_channels); + + bool partial = false; + size_t bit_offset = data->input_buffer_read_offset; + if (partial_frame_saved_) { + XELOGAPU("XmaContext %d: processing saved partial frame", id()); + packet_->data = partial_frame_buffer_.data(); + packet_->size = (int)partial_frame_buffer_.size(); + + bit_offset = partial_frame_start_offset_bits_; + partial = true; + partial_frame_saved_ = false; + } else { + packet_->data = current_input_buffer; + packet_->size = (int)current_input_size; + } + int invalid_frame = 0; // invalid frame? int got_frame = 0; // successfully decoded a frame? int frame_size = 0; - packet_->data = current_input_buffer; - packet_->size = (int)current_input_size; - PrepareDecoder(in0, current_input_size, data->sample_rate, num_channels); - int len = xma2_decode_frame(context_, packet_, decoded_frame_, &got_frame, - &invalid_frame, &frame_size, 1, - data->input_buffer_read_offset); - if (invalid_frame) { - // Invalid frame/packet: length header is 0x7FFF - // Sometimes there's frames in the middle of the stream flagged as - // invalid. - // Double-check to make sure we're not in the middle. - uint32_t frame_byte_offset = data->input_buffer_read_offset >> 3; - uint32_t packet_number = frame_byte_offset / 2048; - if (packet_number < data->input_buffer_0_packet_count - 1) { - // Okay. Skip to the beginning of the next packet. - packet_number++; - data->input_buffer_read_offset = (packet_number * 2048 * 8) + 32; - continue; - } - - // Last frame of the block. Swap buffers if necessary. - if (data->current_buffer == 0) { - if (data->input_buffer_1_valid) { - data->current_buffer++; - } else { - // End of input. - data->input_buffer_read_offset = input_total_size * 8; + int len = + xma2_decode_frame(context_, packet_, decoded_frame_, &got_frame, + &invalid_frame, &frame_size, !partial, bit_offset); + if (!partial && len == 0) { + // Got the last frame of a packet. Advance the read offset to the next + // packet. + uint32_t packet_number = + GetFramePacketNumber(current_input_buffer, current_input_size, + data->input_buffer_read_offset); + if (packet_number == current_input_packet_count - 1) { + // Last packet. + if (data->current_buffer == 0) { + data->input_buffer_0_valid = 0; + data->input_buffer_read_offset = 0; + data->current_buffer = 1; + } else if (data->current_buffer == 1) { + data->input_buffer_1_valid = 0; + data->input_buffer_read_offset = 0; + data->current_buffer = 0; } - - data->input_buffer_0_valid = 0; - return; } else { - // End of input. - data->current_buffer = 0; - data->input_buffer_1_valid = 0; - data->input_buffer_read_offset = input_total_size * 8; - return; + // Advance the read offset. + packet_number++; + uint8_t* packet = current_input_buffer + (packet_number * 2048); + uint32_t first_frame_offset = xma::GetPacketFrameOffset(packet); + if (first_frame_offset == -1) { + // Invalid packet (only contained a frame partial). Out of input. + if (data->current_buffer == 0) { + data->input_buffer_0_valid = 0; + data->current_buffer = 1; + } else if (data->current_buffer == 1) { + data->input_buffer_1_valid = 0; + data->current_buffer = 0; + } + + data->input_buffer_read_offset = 0; + } else { + data->input_buffer_read_offset = + packet_number * 2048 * 8 + first_frame_offset; + } } - } else if (got_frame && len > 0) { + } + + if (got_frame) { // Valid frame. // Check and see if we need to loop back to any spot. if (data->loop_count > 0 && @@ -352,53 +566,28 @@ void XmaContext::DecodePackets(XMA_CONTEXT_DATA* data) { if (data->loop_count < 255) { data->loop_count--; } - } else { + } else if (!partial && len > 0) { data->input_buffer_read_offset += len; - if (data->current_buffer == 0 && - data->input_buffer_read_offset > input_buffer_0_size * 8) { - // Overflow? Setup next buffer. - data->current_buffer++; - data->input_buffer_0_valid = 0; - } else if (data->input_buffer_read_offset > input_total_size * 8) { - // Overflow! The game will fix up the read offset. - data->current_buffer = 0; - data->input_buffer_0_valid = 0; - data->input_buffer_1_valid = 0; - } } - } - - if ((len < 0 || !got_frame) && frame_size != 0) { - // Oh no! Skip the frame and hope everything works. - data->input_buffer_read_offset += frame_size; - data->input_buffer_read_offset = (uint32_t)xma2_correct_frame_offset( - in0, input_buffer_0_size, data->input_buffer_read_offset); - - continue; - } else if (len < 0 || !got_frame) { - // Did not get frame and could not get frame size. - data->input_buffer_0_valid = 0; - data->input_buffer_1_valid = 0; + } else if (len < 0) { + // Did not get frame + XELOGAPU("libav failed to decode a frame!"); + if (frame_size && frame_size != 0x7FFF) { + data->input_buffer_read_offset += frame_size; + } else { + data->input_buffer_0_valid = 0; + data->input_buffer_1_valid = 0; + } return; } - // Sometimes we may run up to <15 bits before the next packet. If this - // happens, we need to automatically advance to the next frame. - // We'll ask the XMA2 decoder to do this for us, since it's more qualified. - data->input_buffer_read_offset = (uint32_t)xma2_correct_frame_offset( - in0, input_buffer_0_size, data->input_buffer_read_offset); last_input_read_pos_ = data->input_buffer_read_offset; - if (data->input_buffer_read_offset == 0) { - // Invalid offset. Out of data. - data->input_buffer_0_valid = 0; - data->input_buffer_1_valid = 0; - } - - // Copy to the output buffer. - // Successfully decoded a frame. - size_t written_bytes = 0; if (got_frame) { + // Successfully decoded a frame. + // Copy to the output buffer. + size_t written_bytes = 0; + #ifdef DEBUG // Validity checks. if (decoded_frame_->nb_samples > kSamplesPerFrame) { @@ -419,7 +608,7 @@ void XmaContext::DecodePackets(XMA_CONTEXT_DATA* data) { #endif // Convert the frame. - ConvertFrame((const float**)decoded_frame_->data, context_->channels, + ConvertFrame((const uint8_t**)decoded_frame_->data, context_->channels, decoded_frame_->nb_samples, current_frame_); current_frame_pos_ = 0; @@ -435,15 +624,18 @@ void XmaContext::DecodePackets(XMA_CONTEXT_DATA* data) { written_bytes = kBytesPerFrame * num_channels; } - } - output_remaining_bytes -= written_bytes; - data->output_buffer_write_offset = output_rb.write_offset() / 256; + output_written = true; + output_remaining_bytes -= written_bytes; + data->output_buffer_write_offset = output_rb.write_offset() / 256; + } } // The game will kick us again with a new output buffer later. // It's important that we only invalidate this if we actually wrote to it!! - data->output_buffer_valid = 0; + if (output_written) { + data->output_buffer_valid = 0; + } } uint32_t XmaContext::GetFramePacketNumber(uint8_t* block, size_t size, @@ -490,7 +682,7 @@ int XmaContext::PrepareDecoder(uint8_t* block, size_t size, int sample_rate, return 0; } -bool XmaContext::ConvertFrame(const float** samples, int num_channels, +bool XmaContext::ConvertFrame(const uint8_t** samples, int num_channels, int num_samples, uint8_t* output_buffer) { // Loop through every sample, convert and drop it into the output array. // If more than one channel, we need to interleave the samples from each @@ -500,7 +692,7 @@ bool XmaContext::ConvertFrame(const float** samples, int num_channels, for (int i = 0; i < num_samples; i++) { for (int j = 0; j < num_channels; j++) { // Select the appropriate array based on the current channel. - auto sample_array = samples[j]; + auto sample_array = reinterpret_cast(samples[j]); // Raw sample should be within [-1, 1]. // Clamp it, just in case. @@ -516,216 +708,5 @@ bool XmaContext::ConvertFrame(const float** samples, int num_channels, return true; } -int XmaContext::StartPacket(XMA_CONTEXT_DATA* data) { - // Translate pointers for future use. - uint8_t* in0 = data->input_buffer_0_valid - ? memory()->TranslatePhysical(data->input_buffer_0_ptr) - : nullptr; - uint8_t* in1 = data->input_buffer_1_valid - ? memory()->TranslatePhysical(data->input_buffer_1_ptr) - : nullptr; - - int sample_rate = GetSampleRate(data->sample_rate); - int channels = data->is_stereo ? 2 : 1; - - // See if we've finished with the input. - // Block count is in packets, so expand by packet size. - uint32_t input_size_0_bytes = data->input_buffer_0_valid - ? (data->input_buffer_0_packet_count) * 2048 - : 0; - uint32_t input_size_1_bytes = data->input_buffer_1_valid - ? (data->input_buffer_1_packet_count) * 2048 - : 0; - - // Total input size - uint32_t input_size_bytes = input_size_0_bytes + input_size_1_bytes; - - // Calculate the first frame offset we need to decode. - uint32_t frame_offset_bits = (data->input_buffer_read_offset % (2048 * 8)); - - // Input read offset is in bits. Typically starts at 32 (4 bytes). - // "Sequence" offset - used internally for WMA Pro decoder. - // Just the read offset. - // NOTE: Read offset may not be at the first frame in a packet! - uint32_t packet_offset_bytes = (data->input_buffer_read_offset & ~0x7FF) / 8; - if (packet_offset_bytes % 2048 != 0) { - packet_offset_bytes -= packet_offset_bytes % 2048; - } - uint32_t input_remaining_bytes = input_size_bytes - packet_offset_bytes; - - if (packet_offset_bytes >= input_size_bytes) { - // No more data available and no packet prepared. - return -1; - } - - // Setup input offset and input buffer. - uint32_t input_offset_bytes = packet_offset_bytes; - auto input_buffer = in0; - - if (packet_offset_bytes >= input_size_0_bytes && input_size_1_bytes) { - // Size overlap, select input buffer 1. - // TODO(DrChat): This needs testing. - input_offset_bytes -= input_size_0_bytes; - input_buffer = in1; - } - - // Still have data to read. - auto packet = input_buffer + input_offset_bytes; - assert_true(input_offset_bytes % 2048 == 0); - PreparePacket(packet, packet_offset_bytes, kBytesPerPacket, sample_rate, - channels); - - data->input_buffer_read_offset += kBytesPerPacket * 8; - - input_remaining_bytes -= kBytesPerPacket; - if (input_remaining_bytes <= 0) { - // Used the last of the data but prepared a packet. - return 0; - } - - return input_remaining_bytes; -} - -int XmaContext::PreparePacket(uint8_t* input, size_t seq_offset, size_t size, - int sample_rate, int channels) { - if (size != kBytesPerPacket) { - // Invalid packet size! - assert_always(); - return 1; - } - if (packet_->size > 0 || current_frame_pos_ != frame_samples_size_) { - // Haven't finished parsing another packet. - return 1; - } - - // Packet metadata is always 1 for XMA2 - assert_true((input[2] & 0x7) == 1); - - packet_->data = input; - packet_->size = (int)size; - - // Re-initialize the context with new sample rate and channels. - if (context_->sample_rate != sample_rate || context_->channels != channels) { - // We have to reopen the codec so it'll realloc whatever data it needs. - // TODO(DrChat): Find a better way. - avcodec_close(context_); - - context_->sample_rate = sample_rate; - context_->channels = channels; - extra_data_.channel_mask = - channels == 2 ? AV_CH_LAYOUT_STEREO : AV_CH_LAYOUT_MONO; - - if (avcodec_open2(context_, codec_, NULL) < 0) { - XELOGE("XmaContext: Failed to reopen libav context"); - return 1; - } - } - - return 0; -} - -void XmaContext::DiscardPacket() { - if (packet_->size > 0 || current_frame_pos_ != frame_samples_size_) { - packet_->data = 0; - packet_->size = 0; - current_frame_pos_ = frame_samples_size_; - } -} - -int XmaContext::DecodePacket(uint8_t* output, size_t output_offset, - size_t output_size, size_t* read_bytes) { - size_t to_copy = 0; - size_t original_offset = output_offset; - if (read_bytes) { - *read_bytes = 0; - } - - // We're holding onto an already-decoded frame. Copy it out. - if (current_frame_pos_ != frame_samples_size_) { - to_copy = std::min(output_size, frame_samples_size_ - current_frame_pos_); - memcpy(output + output_offset, current_frame_ + current_frame_pos_, - to_copy); - - current_frame_pos_ += to_copy; - output_size -= to_copy; - output_offset += to_copy; - } - - while (output_size > 0 && packet_->size > 0) { - int got_frame = 0; - - // Decode the current frame. - int len = - avcodec_decode_audio4(context_, decoded_frame_, &got_frame, packet_); - if (len < 0) { - // Error in codec (bad sample rate or something). - return len; - } - - if (read_bytes) { - *read_bytes += len; - } - - // Offset by decoded length. - packet_->size -= len; - packet_->data += len; - packet_->dts = packet_->pts = AV_NOPTS_VALUE; - - // Successfully decoded a frame. - if (got_frame) { - // Validity checks. - if (decoded_frame_->nb_samples > kSamplesPerFrame) { - return -2; - } else if (context_->sample_fmt != AV_SAMPLE_FMT_FLTP) { - return -3; - } - - // Check the returned buffer size. - if (av_samples_get_buffer_size(NULL, context_->channels, - decoded_frame_->nb_samples, - context_->sample_fmt, 1) != - context_->channels * decoded_frame_->nb_samples * sizeof(float)) { - return -4; - } - - // Loop through every sample, convert and drop it into the output array. - // If more than one channel, the game wants the samples from each channel - // interleaved next to each other. - uint32_t o = 0; - for (int i = 0; i < decoded_frame_->nb_samples; i++) { - for (int j = 0; j < context_->channels; j++) { - // Select the appropriate array based on the current channel. - auto sample_array = reinterpret_cast(decoded_frame_->data[j]); - - // Raw sample should be within [-1, 1]. - // Clamp it, just in case. - float raw_sample = xe::saturate(sample_array[i]); - - // Convert the sample and output it in big endian. - float scaled_sample = raw_sample * ((1 << 15) - 1); - int sample = static_cast(scaled_sample); - xe::store_and_swap(¤t_frame_[o++ * 2], - sample & 0xFFFF); - } - } - current_frame_pos_ = 0; - - // Total size of the frame's samples. - // Magic number 2 is sizeof an output sample. - frame_samples_size_ = context_->channels * decoded_frame_->nb_samples * 2; - - to_copy = std::min(output_size, (size_t)(frame_samples_size_)); - std::memcpy(output + output_offset, current_frame_, to_copy); - - current_frame_pos_ += to_copy; - output_size -= to_copy; - output_offset += to_copy; - } - } - - // Return number of bytes written. - return static_cast(output_offset - original_offset); -} - } // namespace apu } // namespace xe diff --git a/src/xenia/apu/xma_context.h b/src/xenia/apu/xma_context.h index a0f769f38..86eca8c1c 100644 --- a/src/xenia/apu/xma_context.h +++ b/src/xenia/apu/xma_context.h @@ -167,12 +167,14 @@ class XmaContext { private: static int GetSampleRate(int id); + size_t SavePartial(uint8_t* packet, uint32_t frame_offset_bits, size_t frame_size_bits, bool append); + bool ValidFrameOffset(uint8_t* block, size_t size_bytes, size_t frame_offset_bits); void DecodePackets(XMA_CONTEXT_DATA* data); uint32_t GetFramePacketNumber(uint8_t* block, size_t size, size_t bit_offset); int PrepareDecoder(uint8_t* block, size_t size, int sample_rate, int channels); - bool ConvertFrame(const float** samples, int num_channels, int num_samples, + bool ConvertFrame(const uint8_t** samples, int num_channels, int num_samples, uint8_t* output_buffer); int StartPacket(XMA_CONTEXT_DATA* data); @@ -199,6 +201,13 @@ class XmaContext { AVPacket* packet_ = nullptr; WmaProExtraData extra_data_; + bool partial_frame_saved_ = false; + bool partial_frame_size_known_ = false; + size_t partial_frame_total_size_bits_ = 0; + size_t partial_frame_start_offset_bits_ = 0; + size_t partial_frame_offset_bits_ = 0; // blah internal don't use this + std::vector partial_frame_buffer_; + // If we didn't finish writing a frame to the output buffer, this is the offset. size_t current_frame_pos_ = 0; uint32_t last_input_read_pos_ = 0; // Last seen read buffer pos diff --git a/src/xenia/apu/xma_decoder.cc b/src/xenia/apu/xma_decoder.cc index 766ce231b..aaa0edf90 100644 --- a/src/xenia/apu/xma_decoder.cc +++ b/src/xenia/apu/xma_decoder.cc @@ -82,7 +82,7 @@ void av_log_callback(void* avcl, int level, const char* fmt, va_list va) { StringBuffer buff; buff.AppendVarargs(fmt, va); - xe::LogLineVarargs(level_char, "libav: %s", buff.GetString()); + xe::LogLineFormat(level_char, "libav: %s", buff.GetString()); } X_STATUS XmaDecoder::Setup(kernel::KernelState* kernel_state) { @@ -113,7 +113,7 @@ X_STATUS XmaDecoder::Setup(kernel::KernelState* kernel_state) { } registers_.next_context = 1; - //worker_running_ = true; + worker_running_ = true; worker_thread_ = kernel::object_ref( new kernel::XHostThread(kernel_state, 128 * 1024, 0, [this]() { WorkerThreadMain(); @@ -131,6 +131,11 @@ void XmaDecoder::WorkerThreadMain() { for (uint32_t n = 0; n < kContextCount; n++) { XmaContext& context = contexts_[n]; context.Work(); + + // TODO: Need thread safety to do this. + // Probably not too important though. + //registers_.current_context = n; + //registers_.next_context = (n + 1) % kContextCount; } } } @@ -209,7 +214,6 @@ uint32_t XmaDecoder::ReadRegister(uint32_t addr) { // number registers_.current_context = registers_.next_context; registers_.next_context = (registers_.next_context + 1) % kContextCount; - value = registers_.current_context; } value = xe::byte_swap(value); @@ -240,7 +244,6 @@ void XmaDecoder::WriteRegister(uint32_t addr, uint32_t value) { uint32_t context_id = base_context_id + i; XmaContext& context = contexts_[context_id]; context.Enable(); - context.Work(); } } diff --git a/src/xenia/apu/xma_helpers.h b/src/xenia/apu/xma_helpers.h index 8efc9d1d1..bf0f593ac 100644 --- a/src/xenia/apu/xma_helpers.h +++ b/src/xenia/apu/xma_helpers.h @@ -25,7 +25,12 @@ uint32_t GetPacketFrameCount(uint8_t* packet) { // Get the first frame offset in bits uint32_t GetPacketFrameOffset(uint8_t* packet) { - return (uint16_t)((packet[0] << 13) | (packet[1] << 5) | (packet[2] >> 3)) + 32; + uint32_t val = (uint16_t)(((packet[0] & 0x3) << 13) | (packet[1] << 5) | (packet[2] >> 3)); + if (val == 0x7FFF) { + return -1; + } else { + return val + 32; + } } uint32_t GetPacketMetadata(uint8_t* packet) { diff --git a/src/xenia/base/bit_stream.cc b/src/xenia/base/bit_stream.cc new file mode 100644 index 000000000..2a073109e --- /dev/null +++ b/src/xenia/base/bit_stream.cc @@ -0,0 +1,143 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2015 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#include "xenia/base/bit_stream.h" + +#include + +#include "xenia/base/assert.h" +#include "xenia/base/byte_order.h" + +namespace xe { +BitStream::BitStream(uint8_t* buffer, size_t size_in_bits) + : buffer_(buffer), size_bits_(size_in_bits) {} + +BitStream::~BitStream() {} + +void BitStream::SetOffset(size_t offset_bits) { + assert_false(offset_bits > size_bits_); + offset_bits_ = std::min(offset_bits, size_bits_); +} + +size_t BitStream::BitsRemaining() { return size_bits_ - offset_bits_; } + +uint64_t BitStream::Peek(size_t num_bits) { + // FYI: The reason we can't copy more than 57 bits is: + // 57 = 7 * 8 + 1 - that can only span a maximum of 8 bytes. + // We can't read in 9 bytes (easily), so we limit it. + assert_false(num_bits > 57); + assert_false(offset_bits_ + num_bits > size_bits_); + + size_t offset_bytes = offset_bits_ >> 3; + size_t rel_offset_bits = offset_bits_ - (offset_bytes << 3); + + // offset --> + // ..[junk]..| target bits |....[junk]............. + uint64_t bits = *(uint64_t*)(buffer_ + offset_bytes); + + // We need the data in little endian. + // TODO: Have a flag specifying endianness of data? + bits = xe::byte_swap(bits); + + // Shift right + // .....[junk]........| target bits | + bits >>= 64 - (rel_offset_bits + num_bits); + + // AND with mask + // ...................| target bits | + bits &= (1 << num_bits) - 1; + + return bits; +} + +uint64_t BitStream::Read(size_t num_bits) { + uint64_t val = Peek(num_bits); + Advance(num_bits); + + return val; +} + +// TODO: This is totally not tested! +bool BitStream::Write(uint64_t val, size_t num_bits) { + assert_false(num_bits > 57); + assert_false(offset_bits_ + num_bits >= size_bits_); + + size_t offset_bytes = offset_bits_ >> 3; + size_t rel_offset_bits = offset_bits_ - (offset_bytes << 3); + + // Construct a mask + uint64_t mask = (1 << num_bits) - 1; + mask <<= 64 - (rel_offset_bits + num_bits); + mask = ~mask; + + // Shift the value left into position. + val <<= 64 - (rel_offset_bits + num_bits); + + // offset -----> + // ....[junk]...| target bits w/ junk |....[junk]...... + uint64_t bits = *(uint64_t*)(buffer_ + offset_bytes); + + // AND with mask + // ....[junk]...| target bits (0) |........[junk]...... + bits &= mask; + + // OR with val + // ....[junk]...| target bits (val) |......[junk]...... + bits |= val; + + // Store into the bitstream. + *(uint64_t*)(buffer_ + offset_bytes) = bits; + + // Advance the bitstream forward. + Advance(num_bits); + + return true; +} + +size_t BitStream::Copy(uint8_t* dest_buffer, size_t num_bits) { + size_t offset_bytes = offset_bits_ >> 3; + size_t rel_offset_bits = offset_bits_ - (offset_bytes << 3); + size_t bits_left = num_bits; + size_t out_offset_bytes = 0; + + // First: Copy the first few bits up to a byte boundary. + if (rel_offset_bits) { + uint64_t bits = Peek(8 - rel_offset_bits); + dest_buffer[out_offset_bytes] |= (uint8_t)bits; + + bits_left -= 8 - rel_offset_bits; + Advance(8 - rel_offset_bits); + out_offset_bytes++; + } + + // Second: Use memcpy for the bytes left. + if (bits_left >= 8) { + std::memcpy(dest_buffer + out_offset_bytes, + buffer_ + offset_bytes + out_offset_bytes, bits_left / 8); + out_offset_bytes += (bits_left / 8); + Advance((bits_left / 8) * 8); + bits_left -= (bits_left / 8) * 8; + } + + // Third: Copy the last few bits. + if (bits_left) { + uint64_t bits = Peek(bits_left); + bits <<= 8 - bits_left; + + dest_buffer[out_offset_bytes] |= (uint8_t)bits; + Advance(bits_left); + } + + // Return the bit offset to the copied bits. + return rel_offset_bits; +} + +void BitStream::Advance(size_t num_bits) { SetOffset(offset_bits_ + num_bits); } + +} // namespace xe \ No newline at end of file diff --git a/src/xenia/base/bit_stream.h b/src/xenia/base/bit_stream.h new file mode 100644 index 000000000..64cc2704b --- /dev/null +++ b/src/xenia/base/bit_stream.h @@ -0,0 +1,44 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2015 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#ifndef XENIA_BASE_BIT_STREAM_H_ +#define XENIA_BASE_BIT_STREAM_H_ + +#include + +namespace xe { +class BitStream { + public: + BitStream(uint8_t* buffer, size_t size_in_bits); + ~BitStream(); + + const uint8_t* buffer() const { return buffer_; } + uint8_t* buffer() { return buffer_; } + size_t offset_bits() const { return offset_bits_; } + size_t size_bits() const { return size_bits_; } + + void Advance(size_t num_bits); + void SetOffset(size_t offset_bits); + size_t BitsRemaining(); + + // Note: num_bits MUST be in the range 0-57 (inclusive) + uint64_t Peek(size_t num_bits); + uint64_t Read(size_t num_bits); + bool Write(uint64_t val, size_t num_bits); // TODO: Not tested! + + size_t Copy(uint8_t* dest_buffer, size_t num_bits); + + private: + uint8_t* buffer_ = nullptr; + size_t offset_bits_ = 0; + size_t size_bits_ = 0; +}; +} // namespace xe + +#endif // XENIA_BASE_BIT_STREAM_H_ \ No newline at end of file diff --git a/third_party/libav.lua b/third_party/libav.lua new file mode 100644 index 000000000..3341a696c --- /dev/null +++ b/third_party/libav.lua @@ -0,0 +1,3 @@ +group("third_party") + include("libav/libavcodec/premake5.lua") + include("libav/libavutil/premake5.lua")