New WIP audio decoder

This commit is contained in:
Dr. Chat 2015-08-22 11:11:57 -05:00
parent f2b2a22687
commit 0f9cd8cfb3
5 changed files with 245 additions and 116 deletions

3
.gitmodules vendored
View File

@ -19,3 +19,6 @@
[submodule "build_tools"]
path = build_tools
url = https://github.com/xenia-project/build-tools.git
[submodule "third_party/libav"]
path = third_party/libav
url = https://github.com/xenia-project/libav.git

View File

@ -13,13 +13,17 @@
#include <cstring>
#include "xenia/apu/xma_decoder.h"
#include "xenia/apu/xma_helpers.h"
#include "xenia/base/logging.h"
#include "xenia/base/ring_buffer.h"
#include "xenia/profiling.h"
extern "C" {
#include "libavcodec/avcodec.h"
#include "libavcodec/xma2dec.h"
#include "libavutil/channel_layout.h"
extern AVCodec ff_xma2_decoder;
} // extern "C"
// Credits for most of this code goes to:
@ -50,14 +54,8 @@ int XmaContext::Setup(uint32_t id, Memory* memory, uint32_t guest_ptr) {
memory_ = memory;
guest_ptr_ = guest_ptr;
static bool avcodec_initialized = false;
if (!avcodec_initialized) {
avcodec_register_all();
avcodec_initialized = true;
}
// Allocate important stuff.
codec_ = avcodec_find_decoder(AV_CODEC_ID_WMAPRO);
codec_ = &ff_xma2_decoder;
if (!codec_) {
return 1;
}
@ -91,7 +89,7 @@ int XmaContext::Setup(uint32_t id, Memory* memory, uint32_t guest_ptr) {
// Current frame stuff whatever
// samples per frame * 2 max channels * output bytes
current_frame_ = new uint8_t[kSamplesPerFrame * 2 * 2];
current_frame_ = new uint8_t[kSamplesPerFrame * kBytesPerSample * 2];
current_frame_pos_ = 0;
frame_samples_size_ = 0;
@ -119,11 +117,10 @@ void XmaContext::Enable() {
auto context_ptr = memory()->TranslateVirtual(guest_ptr());
XMA_CONTEXT_DATA data(context_ptr);
XELOGAPU(
"XmaContext: kicking context %d (%d/%d bytes)", id(),
(data.input_buffer_read_offset & ~0x7FF) / 8,
(data.input_buffer_0_packet_count + data.input_buffer_1_packet_count) *
kBytesPerPacket);
XELOGAPU("XmaContext: kicking context %d (%d/%d bits)", id(),
data.input_buffer_read_offset, (data.input_buffer_0_packet_count +
data.input_buffer_1_packet_count) *
kBytesPerPacket * 8);
data.Store(context_ptr);
@ -227,6 +224,26 @@ void XmaContext::DecodePackets(XMA_CONTEXT_DATA* data) {
return;
}
// XAudio Loops
// loop_count:
// - XAUDIO2_MAX_LOOP_COUNT = 254
// - XAUDIO2_LOOP_INFINITE = 255
// loop_start/loop_end are bit offsets to a specific frame
//assert_true(data->loop_count == 0);
// Translate pointers for future use.
uint8_t* in0 = data->input_buffer_0_valid
? memory()->TranslatePhysical(data->input_buffer_0_ptr)
: nullptr;
uint8_t* in1 = data->input_buffer_1_valid
? memory()->TranslatePhysical(data->input_buffer_1_ptr)
: nullptr;
size_t input_buffer_0_size =
data->input_buffer_0_packet_count * kBytesPerPacket;
size_t input_buffer_1_size =
data->input_buffer_1_packet_count * kBytesPerPacket;
// Output buffers are in raw PCM samples, 256 bytes per block.
// Output buffer is a ring buffer. We need to write from the write offset
// to the read offset.
@ -246,76 +263,175 @@ void XmaContext::DecodePackets(XMA_CONTEXT_DATA* data) {
// Decode until we can't write any more data.
while (output_remaining_bytes > 0) {
// This'll copy audio samples into the output buffer.
// The samples need to be 2 bytes long!
// Copies one frame at a time, so keep calling this until size == 0.
int written_bytes = 0;
int decode_attempts_remaining = 3;
uint8_t work_buffer[kOutputMaxSizeBytes];
while (decode_attempts_remaining) {
size_t read_bytes = 0;
written_bytes =
DecodePacket(work_buffer, 0, output_remaining_bytes, &read_bytes);
if (written_bytes >= 0) {
// assert_true((written_bytes % 256) == 0);
auto written_bytes_rb = output_rb.Write(work_buffer, written_bytes);
assert_true(written_bytes == written_bytes_rb);
// Ok.
break;
} else if (read_bytes % 2048 == 0) {
// Sometimes the decoder will fail on a packet. I think it's
// looking for cross-packet frames and failing. If you run it again
// on the same packet it'll work though.
--decode_attempts_remaining;
} else {
// Failed in the middle of a packet, do not retry!
decode_attempts_remaining = 0;
break;
}
}
if (!decode_attempts_remaining) {
XELOGAPU("XmaContext: libav failed to decode packet (returned %.8X)",
-written_bytes);
// Failed out.
if (data->input_buffer_0_valid || data->input_buffer_1_valid) {
// There's new data available - maybe we'll be ok if we decode it?
written_bytes = 0;
DiscardPacket();
} else {
// No data and hosed - bail.
break;
}
}
data->output_buffer_write_offset = output_rb.write_offset() / 256;
output_remaining_bytes -= written_bytes;
// If we need more data and the input buffers have it, grab it.
if (written_bytes) {
// Haven't finished with current packet.
continue;
} else if (data->input_buffer_0_valid || data->input_buffer_1_valid) {
// Done with previous packet, so grab a new one.
int ret = StartPacket(data);
if (ret <= 0) {
// No more data (but may have prepared a packet)
data->input_buffer_0_valid = 0;
data->input_buffer_1_valid = 0;
}
} else {
// Decoder is out of data and there's no more to give.
if (!data->input_buffer_0_valid && !data->input_buffer_1_valid) {
// Out of data.
break;
}
int num_channels = data->is_stereo ? 2 : 1;
// Check if we have part of a frame waiting (and the game hasn't jumped
// around)
if (current_frame_pos_ &&
last_input_read_pos_ == data->input_buffer_read_offset) {
size_t to_write = std::min(
output_remaining_bytes,
((size_t)kBytesPerFrame * num_channels - current_frame_pos_));
output_rb.Write(current_frame_, to_write);
current_frame_pos_ += to_write;
if (current_frame_pos_ >= kBytesPerFrame * num_channels) {
current_frame_pos_ = 0;
}
data->output_buffer_write_offset = output_rb.write_offset() / 256;
output_remaining_bytes -= to_write;
continue;
}
int block_last_frame = 0; // last frame in block?
int got_frame = 0; // successfully decoded a frame?
int frame_size = 0;
packet_->data = in0;
packet_->size = data->input_buffer_0_packet_count * 2048;
PrepareDecoder(in0, data->input_buffer_0_packet_count * 2048,
data->sample_rate, num_channels);
int len = xma2_decode_frame(context_, packet_, decoded_frame_, &got_frame,
&block_last_frame, &frame_size,
data->input_buffer_read_offset);
if (block_last_frame) {
data->input_buffer_0_valid = 0;
data->input_buffer_1_valid = 0;
data->output_buffer_valid = 0;
continue;
}
if (len == AVERROR_EOF) {
// Screw this gtfo
data->input_buffer_0_valid = 0;
data->input_buffer_1_valid = 0;
data->output_buffer_valid = 0;
continue;
} else if (len < 0 || !got_frame) {
// Oh no! Skip the frame and hope everything works.
data->input_buffer_read_offset += frame_size;
continue;
}
XELOGD("LEN: %d (%x)", len, len);
data->input_buffer_read_offset += len;
last_input_read_pos_ = data->input_buffer_read_offset;
// Copy to the output buffer.
// Successfully decoded a frame.
size_t written_bytes = 0;
if (got_frame) {
// Validity checks.
if (decoded_frame_->nb_samples > kSamplesPerFrame) {
return;
} else if (context_->sample_fmt != AV_SAMPLE_FMT_FLTP) {
return;
}
// Check the returned buffer size.
if (av_samples_get_buffer_size(NULL, context_->channels,
decoded_frame_->nb_samples,
context_->sample_fmt, 1) !=
context_->channels * decoded_frame_->nb_samples * sizeof(float)) {
return;
}
// Loop through every sample, convert and drop it into the output array.
// If more than one channel, the game wants the samples from each channel
// interleaved next to each other.
uint32_t o = 0;
for (int i = 0; i < decoded_frame_->nb_samples; i++) {
for (int j = 0; j < context_->channels; j++) {
// Select the appropriate array based on the current channel.
auto sample_array = reinterpret_cast<float*>(decoded_frame_->data[j]);
// Raw sample should be within [-1, 1].
// Clamp it, just in case.
float raw_sample = xe::saturate(sample_array[i]);
// Convert the sample and output it in big endian.
float scaled_sample = raw_sample * ((1 << 15) - 1);
int sample = static_cast<int>(scaled_sample);
xe::store_and_swap<uint16_t>(&current_frame_[o++ * 2],
sample & 0xFFFF);
}
}
current_frame_pos_ = 0;
if (output_remaining_bytes < kBytesPerFrame * num_channels) {
// Output buffer isn't big enough to store the entire frame! Write out a
// part of it.
current_frame_pos_ = output_remaining_bytes;
output_rb.Write(current_frame_, output_remaining_bytes);
written_bytes = output_remaining_bytes;
} else {
output_rb.Write(current_frame_, kBytesPerFrame * num_channels);
written_bytes = kBytesPerFrame * num_channels;
}
}
output_remaining_bytes -= written_bytes;
data->output_buffer_write_offset = output_rb.write_offset() / 256;
}
// The game will kick us again with a new output buffer later.
data->output_buffer_valid = 0;
}
uint32_t XmaContext::GetFramePacketNumber(uint8_t* block, size_t size,
size_t bit_offset) {
size *= 8;
if (bit_offset >= size) {
// Not good :(
assert_always();
return -1;
}
size_t byte_offset = bit_offset >> 3;
size_t packet_number = byte_offset / kBytesPerPacket;
return (uint32_t)packet_number;
}
int XmaContext::PrepareDecoder(uint8_t* block, size_t size, int sample_rate,
int channels) {
// Sanity check: Packet metadata is always 1 for XMA2
assert_true((block[2] & 0x7) == 1);
sample_rate = GetSampleRate(sample_rate);
// Re-initialize the context with new sample rate and channels.
if (context_->sample_rate != sample_rate || context_->channels != channels) {
// We have to reopen the codec so it'll realloc whatever data it needs.
// TODO(DrChat): Find a better way.
avcodec_close(context_);
context_->sample_rate = sample_rate;
context_->channels = channels;
extra_data_.channel_mask =
channels == 2 ? AV_CH_LAYOUT_STEREO : AV_CH_LAYOUT_MONO;
if (avcodec_open2(context_, codec_, NULL) < 0) {
XELOGE("XmaContext: Failed to reopen libav context");
return 1;
}
}
av_frame_unref(decoded_frame_);
return 0;
}
int XmaContext::StartPacket(XMA_CONTEXT_DATA* data) {
// Translate pointers for future use.
uint8_t* in0 = data->input_buffer_0_valid
@ -340,41 +456,49 @@ int XmaContext::StartPacket(XMA_CONTEXT_DATA* data) {
// Total input size
uint32_t input_size_bytes = input_size_0_bytes + input_size_1_bytes;
// Calculate the first frame offset we need to decode.
uint32_t frame_offset_bits = (data->input_buffer_read_offset % (2048 * 8));
// Input read offset is in bits. Typically starts at 32 (4 bytes).
// "Sequence" offset - used internally for WMA Pro decoder.
// Just the read offset.
uint32_t seq_offset_bytes = (data->input_buffer_read_offset & ~0x7FF) / 8;
uint32_t input_remaining_bytes = input_size_bytes - seq_offset_bytes;
// NOTE: Read offset may not be at the first frame in a packet!
uint32_t packet_offset_bytes = (data->input_buffer_read_offset & ~0x7FF) / 8;
if (packet_offset_bytes % 2048 != 0) {
packet_offset_bytes -= packet_offset_bytes % 2048;
}
uint32_t input_remaining_bytes = input_size_bytes - packet_offset_bytes;
if (seq_offset_bytes < input_size_bytes) {
// Setup input offset and input buffer.
uint32_t input_offset_bytes = seq_offset_bytes;
auto input_buffer = in0;
if (seq_offset_bytes >= input_size_0_bytes && input_size_1_bytes) {
// Size overlap, select input buffer 1.
// TODO(DrChat): This needs testing.
input_offset_bytes -= input_size_0_bytes;
input_buffer = in1;
}
// Still have data to read.
auto packet = input_buffer + input_offset_bytes;
assert_true(input_offset_bytes % 2048 == 0);
PreparePacket(packet, seq_offset_bytes, kBytesPerPacket, sample_rate,
channels);
data->input_buffer_read_offset += kBytesPerPacket * 8;
input_remaining_bytes -= kBytesPerPacket;
if (input_remaining_bytes <= 0) {
// Used the last of the data but prepared a packet.
return 0;
}
} else {
if (packet_offset_bytes >= input_size_bytes) {
// No more data available and no packet prepared.
return -1;
}
// Setup input offset and input buffer.
uint32_t input_offset_bytes = packet_offset_bytes;
auto input_buffer = in0;
if (packet_offset_bytes >= input_size_0_bytes && input_size_1_bytes) {
// Size overlap, select input buffer 1.
// TODO(DrChat): This needs testing.
input_offset_bytes -= input_size_0_bytes;
input_buffer = in1;
}
// Still have data to read.
auto packet = input_buffer + input_offset_bytes;
assert_true(input_offset_bytes % 2048 == 0);
PreparePacket(packet, packet_offset_bytes, kBytesPerPacket, sample_rate,
channels);
data->input_buffer_read_offset += kBytesPerPacket * 8;
input_remaining_bytes -= kBytesPerPacket;
if (input_remaining_bytes <= 0) {
// Used the last of the data but prepared a packet.
return 0;
}
return input_remaining_bytes;
}
@ -390,15 +514,11 @@ int XmaContext::PreparePacket(uint8_t* input, size_t seq_offset, size_t size,
return 1;
}
std::memcpy(packet_data_, input, size);
// Packet metadata is always 1 for XMA2
assert_true((input[2] & 0x7) == 1);
// Modify the packet header so it's WMAPro compatible.
auto int_packet_data = reinterpret_cast<int*>(packet_data_);
*int_packet_data =
(((seq_offset & 0x7800) | 0x400) >> 7) | (*int_packet_data & 0xFFFEFF08);
packet_->data = packet_data_;
packet_->size = kBytesPerPacket;
packet_->data = input;
packet_->size = (int)size;
// Re-initialize the context with new sample rate and channels.
if (context_->sample_rate != sample_rate || context_->channels != channels) {

View File

@ -60,8 +60,8 @@ struct XMA_CONTEXT_DATA {
uint32_t input_buffer_1_packet_count : 12; // XMASetInputBuffer1, number of
// 2KB packets. Max 4095 packets.
// These packets form a block.
uint32_t loop_subframe_end : 2; // +12bit, XMASetLoopData
uint32_t unk_dword_1_a : 3; // ? might be loop_subframe_skip
uint32_t loop_subframe_start : 2; // +12bit, XMASetLoopData
uint32_t loop_subframe_end : 3; // +14bit, XMASetLoopData
uint32_t loop_subframe_skip : 3; // +17bit, XMASetLoopData might be
// subframe_decode_count
uint32_t subframe_decode_count : 4; // +20bit might be subframe_skip_count
@ -91,7 +91,7 @@ struct XMA_CONTEXT_DATA {
// DWORD 7
uint32_t output_buffer_ptr; // physical address
// DWORD 8
uint32_t overlap_add_ptr; // PtrOverlapAdd(?)
uint32_t work_buffer_ptr; // PtrOverlapAdd(?)
// DWORD 9
// +0bit, XMAGetOutputBufferReadOffset AKA WriteBufferOffsetRead
@ -133,6 +133,7 @@ class XmaContext {
static const uint32_t kBytesPerSample = 2;
static const uint32_t kSamplesPerFrame = 512;
static const uint32_t kSamplesPerSubframe = 128;
static const uint32_t kBytesPerFrame = kSamplesPerFrame * kBytesPerSample;
static const uint32_t kBytesPerSubframe =
kSamplesPerSubframe * kBytesPerSample;
@ -165,6 +166,10 @@ class XmaContext {
static int GetSampleRate(int id);
void DecodePackets(XMA_CONTEXT_DATA* data);
uint32_t GetFramePacketNumber(uint8_t* block, size_t size, size_t bit_offset);
int PrepareDecoder(uint8_t* block, size_t size, int sample_rate,
int channels);
int StartPacket(XMA_CONTEXT_DATA* data);
int PreparePacket(uint8_t* input, size_t seq_offset, size_t size,
@ -189,11 +194,11 @@ class XmaContext {
AVPacket* packet_ = nullptr;
WmaProExtraData extra_data_;
// If we didn't finish writing a frame to the output buffer, this is the offset.
size_t current_frame_pos_ = 0;
uint32_t last_input_read_pos_ = 0; // Last seen read buffer pos
uint8_t* current_frame_ = nullptr;
uint32_t frame_samples_size_ = 0;
uint8_t packet_data_[kBytesPerPacket];
};
} // namespace apu

View File

@ -171,7 +171,7 @@ SHIM_CALL XMASetLoopData_shim(PPCContext* ppc_context,
context.loop_end = loop_data->loop_end;
context.loop_count = loop_data->loop_count;
context.loop_subframe_end = loop_data->loop_subframe_end;
context.loop_subframe_skip = loop_data->loop_subframe_end;
context.loop_subframe_skip = loop_data->loop_subframe_skip;
context.Store(SHIM_MEM_ADDR(context_ptr));

1
third_party/libav vendored Submodule

@ -0,0 +1 @@
Subproject commit 4752bdcdbaf61afc933e667a016bca4eb389ac21