More improvements to the XMA decoder (and included some forgotten files)

This commit is contained in:
Dr. Chat 2015-08-24 19:42:27 -05:00
parent 0f9cd8cfb3
commit d8ed66c336
6 changed files with 221 additions and 52 deletions

View File

@ -0,0 +1,9 @@
<?xml version="1.0" encoding="utf-8"?>
<AutoVisualizer xmlns="http://schemas.microsoft.com/vstudio/debugger/natvis/2010">
<!-- Shims -->
<Type Name="xe::apu::XmaContext">
<DisplayString>id={id_}, allocated={is_allocated_}, enabled={is_enabled_}</DisplayString>
</Type>
</AutoVisualizer>

View File

@ -215,6 +215,7 @@ void XmaContext::DecodePackets(XMA_CONTEXT_DATA* data) {
// Quick die if there's no data. // Quick die if there's no data.
if (!data->input_buffer_0_valid && !data->input_buffer_1_valid) { if (!data->input_buffer_0_valid && !data->input_buffer_1_valid) {
XELOGAPU("Context %d: No valid input buffers!", id());
return; return;
} }
@ -224,25 +225,36 @@ void XmaContext::DecodePackets(XMA_CONTEXT_DATA* data) {
return; return;
} }
assert_zero(data->unk_dword_9);
// XAudio Loops // XAudio Loops
// loop_count: // loop_count:
// - XAUDIO2_MAX_LOOP_COUNT = 254 // - XAUDIO2_MAX_LOOP_COUNT = 254
// - XAUDIO2_LOOP_INFINITE = 255 // - XAUDIO2_LOOP_INFINITE = 255
// loop_start/loop_end are bit offsets to a specific frame // loop_start/loop_end are bit offsets to a specific frame
//assert_true(data->loop_count == 0);
// Translate pointers for future use. // Translate pointers for future use.
// Sometimes the game will use rolling input buffers. If they do, we cannot
// assume they form a complete block! In addition, the buffers DO NOT have
// to be sequential!
// (bit.trip runner 2 does this)
// TODO: Collect partial frames into a buffer if the game uses rolling buffers,
// and present the full frame to libav when we get it.
uint8_t* in0 = data->input_buffer_0_valid uint8_t* in0 = data->input_buffer_0_valid
? memory()->TranslatePhysical(data->input_buffer_0_ptr) ? memory()->TranslatePhysical(data->input_buffer_0_ptr)
: nullptr; : nullptr;
uint8_t* in1 = data->input_buffer_1_valid uint8_t* in1 = data->input_buffer_1_valid
? memory()->TranslatePhysical(data->input_buffer_1_ptr) ? memory()->TranslatePhysical(data->input_buffer_1_ptr)
: nullptr; : nullptr;
uint8_t* current_input_buffer = in0;
size_t input_buffer_0_size = size_t input_buffer_0_size =
data->input_buffer_0_packet_count * kBytesPerPacket; data->input_buffer_0_packet_count * kBytesPerPacket;
size_t input_buffer_1_size = size_t input_buffer_1_size =
data->input_buffer_1_packet_count * kBytesPerPacket; data->input_buffer_1_packet_count * kBytesPerPacket;
size_t current_input_size =
data->current_buffer ? input_buffer_1_size : input_buffer_0_size;
size_t input_total_size = input_buffer_0_size + input_buffer_1_size;
// Output buffers are in raw PCM samples, 256 bytes per block. // Output buffers are in raw PCM samples, 256 bytes per block.
// Output buffer is a ring buffer. We need to write from the write offset // Output buffer is a ring buffer. We need to write from the write offset
@ -289,50 +301,111 @@ void XmaContext::DecodePackets(XMA_CONTEXT_DATA* data) {
continue; continue;
} }
int block_last_frame = 0; // last frame in block? int invalid_frame = 0; // invalid frame?
int got_frame = 0; // successfully decoded a frame? int got_frame = 0; // successfully decoded a frame?
int frame_size = 0; int frame_size = 0;
packet_->data = in0; packet_->data = current_input_buffer;
packet_->size = data->input_buffer_0_packet_count * 2048; packet_->size = (int)current_input_size;
PrepareDecoder(in0, data->input_buffer_0_packet_count * 2048, PrepareDecoder(in0, current_input_size, data->sample_rate, num_channels);
data->sample_rate, num_channels);
int len = xma2_decode_frame(context_, packet_, decoded_frame_, &got_frame, int len = xma2_decode_frame(context_, packet_, decoded_frame_, &got_frame,
&block_last_frame, &frame_size, &invalid_frame, &frame_size, 1,
data->input_buffer_read_offset); data->input_buffer_read_offset);
if (block_last_frame) { if (invalid_frame) {
data->input_buffer_0_valid = 0; // Invalid frame/packet: length header is 0x7FFF
data->input_buffer_1_valid = 0; // Sometimes there's frames in the middle of the stream flagged as
data->output_buffer_valid = 0; // invalid.
// Double-check to make sure we're not in the middle.
uint32_t frame_byte_offset = data->input_buffer_read_offset >> 3;
uint32_t packet_number = frame_byte_offset / 2048;
if (packet_number < data->input_buffer_0_packet_count - 1) {
// Okay. Skip to the beginning of the next packet.
packet_number++;
data->input_buffer_read_offset = (packet_number * 2048 * 8) + 32;
continue; continue;
} }
if (len == AVERROR_EOF) { // Last frame of the block. Swap buffers if necessary.
// Screw this gtfo if (data->current_buffer == 0) {
if (data->input_buffer_1_valid) {
data->current_buffer++;
} else {
// End of input.
data->input_buffer_read_offset = input_total_size * 8;
}
data->input_buffer_0_valid = 0;
return;
} else {
// End of input.
data->current_buffer = 0;
data->input_buffer_1_valid = 0;
data->input_buffer_read_offset = input_total_size * 8;
return;
}
} else if (got_frame && len > 0) {
// Valid frame.
// Check and see if we need to loop back to any spot.
if (data->loop_count > 0 &&
data->input_buffer_read_offset == data->loop_end) {
// Loop back to the beginning.
data->input_buffer_read_offset = data->loop_start;
if (data->loop_count < 255) {
data->loop_count--;
}
} else {
data->input_buffer_read_offset += len;
if (data->current_buffer == 0 &&
data->input_buffer_read_offset > input_buffer_0_size * 8) {
// Overflow? Setup next buffer.
data->current_buffer++;
data->input_buffer_0_valid = 0;
} else if (data->input_buffer_read_offset > input_total_size * 8) {
// Overflow! The game will fix up the read offset.
data->current_buffer = 0;
data->input_buffer_0_valid = 0; data->input_buffer_0_valid = 0;
data->input_buffer_1_valid = 0; data->input_buffer_1_valid = 0;
data->output_buffer_valid = 0; }
}
}
if ((len < 0 || !got_frame) && frame_size != 0) {
// Oh no! Skip the frame and hope everything works.
data->input_buffer_read_offset += frame_size;
data->input_buffer_read_offset = (uint32_t)xma2_correct_frame_offset(
in0, input_buffer_0_size, data->input_buffer_read_offset);
continue; continue;
} else if (len < 0 || !got_frame) { } else if (len < 0 || !got_frame) {
// Oh no! Skip the frame and hope everything works. // Did not get frame and could not get frame size.
data->input_buffer_read_offset += frame_size; data->input_buffer_0_valid = 0;
data->input_buffer_1_valid = 0;
continue; return;
} }
XELOGD("LEN: %d (%x)", len, len); // Sometimes we may run up to <15 bits before the next packet. If this
// happens, we need to automatically advance to the next frame.
data->input_buffer_read_offset += len; // We'll ask the XMA2 decoder to do this for us, since it's more qualified.
data->input_buffer_read_offset = (uint32_t)xma2_correct_frame_offset(
in0, input_buffer_0_size, data->input_buffer_read_offset);
last_input_read_pos_ = data->input_buffer_read_offset; last_input_read_pos_ = data->input_buffer_read_offset;
if (data->input_buffer_read_offset == 0) {
// Invalid offset. Out of data.
data->input_buffer_0_valid = 0;
data->input_buffer_1_valid = 0;
}
// Copy to the output buffer. // Copy to the output buffer.
// Successfully decoded a frame. // Successfully decoded a frame.
size_t written_bytes = 0; size_t written_bytes = 0;
if (got_frame) { if (got_frame) {
#ifdef DEBUG
// Validity checks. // Validity checks.
if (decoded_frame_->nb_samples > kSamplesPerFrame) { if (decoded_frame_->nb_samples > kSamplesPerFrame) {
XELOGAPU("Decoded frame has an invalid sample count!");
return; return;
} else if (context_->sample_fmt != AV_SAMPLE_FMT_FLTP) { } else if (context_->sample_fmt != AV_SAMPLE_FMT_FLTP) {
XELOGAPU("libav decoder did not output floating point samples!");
return; return;
} }
@ -343,27 +416,11 @@ void XmaContext::DecodePackets(XMA_CONTEXT_DATA* data) {
context_->channels * decoded_frame_->nb_samples * sizeof(float)) { context_->channels * decoded_frame_->nb_samples * sizeof(float)) {
return; return;
} }
#endif
// Loop through every sample, convert and drop it into the output array. // Convert the frame.
// If more than one channel, the game wants the samples from each channel ConvertFrame((const float**)decoded_frame_->data, context_->channels,
// interleaved next to each other. decoded_frame_->nb_samples, current_frame_);
uint32_t o = 0;
for (int i = 0; i < decoded_frame_->nb_samples; i++) {
for (int j = 0; j < context_->channels; j++) {
// Select the appropriate array based on the current channel.
auto sample_array = reinterpret_cast<float*>(decoded_frame_->data[j]);
// Raw sample should be within [-1, 1].
// Clamp it, just in case.
float raw_sample = xe::saturate(sample_array[i]);
// Convert the sample and output it in big endian.
float scaled_sample = raw_sample * ((1 << 15) - 1);
int sample = static_cast<int>(scaled_sample);
xe::store_and_swap<uint16_t>(&current_frame_[o++ * 2],
sample & 0xFFFF);
}
}
current_frame_pos_ = 0; current_frame_pos_ = 0;
if (output_remaining_bytes < kBytesPerFrame * num_channels) { if (output_remaining_bytes < kBytesPerFrame * num_channels) {
@ -385,6 +442,7 @@ void XmaContext::DecodePackets(XMA_CONTEXT_DATA* data) {
} }
// The game will kick us again with a new output buffer later. // The game will kick us again with a new output buffer later.
// It's important that we only invalidate this if we actually wrote to it!!
data->output_buffer_valid = 0; data->output_buffer_valid = 0;
} }
@ -405,8 +463,8 @@ uint32_t XmaContext::GetFramePacketNumber(uint8_t* block, size_t size,
int XmaContext::PrepareDecoder(uint8_t* block, size_t size, int sample_rate, int XmaContext::PrepareDecoder(uint8_t* block, size_t size, int sample_rate,
int channels) { int channels) {
// Sanity check: Packet metadata is always 1 for XMA2 // Sanity check: Packet metadata is always 1 for XMA2/0 for XMA
assert_true((block[2] & 0x7) == 1); assert_true((block[2] & 0x7) == 1 || (block[2] & 0x7) == 0);
sample_rate = GetSampleRate(sample_rate); sample_rate = GetSampleRate(sample_rate);
@ -432,6 +490,32 @@ int XmaContext::PrepareDecoder(uint8_t* block, size_t size, int sample_rate,
return 0; return 0;
} }
bool XmaContext::ConvertFrame(const float** samples, int num_channels,
int num_samples, uint8_t* output_buffer) {
// Loop through every sample, convert and drop it into the output array.
// If more than one channel, we need to interleave the samples from each
// channel next to each other.
// TODO: This can definitely be optimized with AVX/SSE intrinsics!
uint32_t o = 0;
for (int i = 0; i < num_samples; i++) {
for (int j = 0; j < num_channels; j++) {
// Select the appropriate array based on the current channel.
auto sample_array = samples[j];
// Raw sample should be within [-1, 1].
// Clamp it, just in case.
float raw_sample = xe::saturate(sample_array[i]);
// Convert the sample and output it in big endian.
float scaled_sample = raw_sample * ((1 << 15) - 1);
int sample = static_cast<int>(scaled_sample);
xe::store_and_swap<uint16_t>(&output_buffer[o++ * 2], sample & 0xFFFF);
}
}
return true;
}
int XmaContext::StartPacket(XMA_CONTEXT_DATA* data) { int XmaContext::StartPacket(XMA_CONTEXT_DATA* data) {
// Translate pointers for future use. // Translate pointers for future use.
uint8_t* in0 = data->input_buffer_0_valid uint8_t* in0 = data->input_buffer_0_valid

View File

@ -77,10 +77,12 @@ struct XMA_CONTEXT_DATA {
// DWORD 3 // DWORD 3
uint32_t loop_start : 26; // XMASetLoopData LoopStartOffset uint32_t loop_start : 26; // XMASetLoopData LoopStartOffset
// frame offset in bits
uint32_t unk_dword_3 : 6; // ? ParserErrorStatus/ParserErrorSet(?) uint32_t unk_dword_3 : 6; // ? ParserErrorStatus/ParserErrorSet(?)
// DWORD 4 // DWORD 4
uint32_t loop_end : 26; // XMASetLoopData LoopEndOffset uint32_t loop_end : 26; // XMASetLoopData LoopEndOffset
// frame offset in bits
uint32_t packet_metadata : 5; // XMAGetPacketMetadata uint32_t packet_metadata : 5; // XMAGetPacketMetadata
uint32_t current_buffer : 1; // ? uint32_t current_buffer : 1; // ?
@ -170,6 +172,9 @@ class XmaContext {
int PrepareDecoder(uint8_t* block, size_t size, int sample_rate, int PrepareDecoder(uint8_t* block, size_t size, int sample_rate,
int channels); int channels);
bool ConvertFrame(const float** samples, int num_channels, int num_samples,
uint8_t* output_buffer);
int StartPacket(XMA_CONTEXT_DATA* data); int StartPacket(XMA_CONTEXT_DATA* data);
int PreparePacket(uint8_t* input, size_t seq_offset, size_t size, int PreparePacket(uint8_t* input, size_t seq_offset, size_t size,

View File

@ -55,7 +55,34 @@ XmaDecoder::XmaDecoder(cpu::Processor* processor)
XmaDecoder::~XmaDecoder() = default; XmaDecoder::~XmaDecoder() = default;
void av_log_callback(void* avcl, int level, const char* fmt, va_list va) { void av_log_callback(void* avcl, int level, const char* fmt, va_list va) {
xe::LogLineVarargs('A', fmt, va); #ifdef NDEBUG
if (level > AV_LOG_WARNING) {
return;
}
#endif
char level_char = '?';
switch (level) {
case AV_LOG_ERROR:
level_char = '!';
break;
case AV_LOG_WARNING:
level_char = 'w';
break;
case AV_LOG_INFO:
level_char = 'i';
break;
case AV_LOG_VERBOSE:
level_char = 'v';
break;
case AV_LOG_DEBUG:
level_char = 'd';
break;
}
StringBuffer buff;
buff.AppendVarargs(fmt, va);
xe::LogLineVarargs(level_char, "libav: %s", buff.GetString());
} }
X_STATUS XmaDecoder::Setup(kernel::KernelState* kernel_state) { X_STATUS XmaDecoder::Setup(kernel::KernelState* kernel_state) {
@ -86,7 +113,7 @@ X_STATUS XmaDecoder::Setup(kernel::KernelState* kernel_state) {
} }
registers_.next_context = 1; registers_.next_context = 1;
worker_running_ = true; //worker_running_ = true;
worker_thread_ = kernel::object_ref<kernel::XHostThread>( worker_thread_ = kernel::object_ref<kernel::XHostThread>(
new kernel::XHostThread(kernel_state, 128 * 1024, 0, [this]() { new kernel::XHostThread(kernel_state, 128 * 1024, 0, [this]() {
WorkerThreadMain(); WorkerThreadMain();
@ -213,6 +240,7 @@ void XmaDecoder::WriteRegister(uint32_t addr, uint32_t value) {
uint32_t context_id = base_context_id + i; uint32_t context_id = base_context_id + i;
XmaContext& context = contexts_[context_id]; XmaContext& context = contexts_[context_id];
context.Enable(); context.Enable();
context.Work();
} }
} }

View File

@ -0,0 +1,43 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2015 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
// This file contains some functions used to help parse XMA data.
#ifndef XENIA_APU_XMA_HELPERS_H_
#define XENIA_APU_XMA_HELPERS_H_
#include <stdint.h>
namespace xe {
namespace apu {
namespace xma {
// Get number of frames that /begin/ in this packet.
uint32_t GetPacketFrameCount(uint8_t* packet) {
return (uint8_t)(packet[0] >> 2);
}
// Get the first frame offset in bits
uint32_t GetPacketFrameOffset(uint8_t* packet) {
return (uint16_t)((packet[0] << 13) | (packet[1] << 5) | (packet[2] >> 3)) + 32;
}
uint32_t GetPacketMetadata(uint8_t* packet) {
return (uint8_t)(packet[2] & 0x7);
}
uint32_t GetPacketSkipCount(uint8_t* packet) {
return (uint8_t)(packet[3]);
}
} // namespace xma
} // namespace apu
} // namespace xe
#endif // XENIA_APU_XMA_HELPERS_H_

2
third_party/libav vendored

@ -1 +1 @@
Subproject commit 4752bdcdbaf61afc933e667a016bca4eb389ac21 Subproject commit 8be22f03d7e3c1663a66cc09375f840a7fc9a365