New WIP audio decoder
This commit is contained in:
parent
f2b2a22687
commit
0f9cd8cfb3
|
@ -19,3 +19,6 @@
|
|||
[submodule "build_tools"]
|
||||
path = build_tools
|
||||
url = https://github.com/xenia-project/build-tools.git
|
||||
[submodule "third_party/libav"]
|
||||
path = third_party/libav
|
||||
url = https://github.com/xenia-project/libav.git
|
||||
|
|
|
@ -13,13 +13,17 @@
|
|||
#include <cstring>
|
||||
|
||||
#include "xenia/apu/xma_decoder.h"
|
||||
#include "xenia/apu/xma_helpers.h"
|
||||
#include "xenia/base/logging.h"
|
||||
#include "xenia/base/ring_buffer.h"
|
||||
#include "xenia/profiling.h"
|
||||
|
||||
extern "C" {
|
||||
#include "libavcodec/avcodec.h"
|
||||
#include "libavcodec/xma2dec.h"
|
||||
#include "libavutil/channel_layout.h"
|
||||
|
||||
extern AVCodec ff_xma2_decoder;
|
||||
} // extern "C"
|
||||
|
||||
// Credits for most of this code goes to:
|
||||
|
@ -50,14 +54,8 @@ int XmaContext::Setup(uint32_t id, Memory* memory, uint32_t guest_ptr) {
|
|||
memory_ = memory;
|
||||
guest_ptr_ = guest_ptr;
|
||||
|
||||
static bool avcodec_initialized = false;
|
||||
if (!avcodec_initialized) {
|
||||
avcodec_register_all();
|
||||
avcodec_initialized = true;
|
||||
}
|
||||
|
||||
// Allocate important stuff.
|
||||
codec_ = avcodec_find_decoder(AV_CODEC_ID_WMAPRO);
|
||||
codec_ = &ff_xma2_decoder;
|
||||
if (!codec_) {
|
||||
return 1;
|
||||
}
|
||||
|
@ -91,7 +89,7 @@ int XmaContext::Setup(uint32_t id, Memory* memory, uint32_t guest_ptr) {
|
|||
|
||||
// Current frame stuff whatever
|
||||
// samples per frame * 2 max channels * output bytes
|
||||
current_frame_ = new uint8_t[kSamplesPerFrame * 2 * 2];
|
||||
current_frame_ = new uint8_t[kSamplesPerFrame * kBytesPerSample * 2];
|
||||
current_frame_pos_ = 0;
|
||||
frame_samples_size_ = 0;
|
||||
|
||||
|
@ -119,11 +117,10 @@ void XmaContext::Enable() {
|
|||
auto context_ptr = memory()->TranslateVirtual(guest_ptr());
|
||||
XMA_CONTEXT_DATA data(context_ptr);
|
||||
|
||||
XELOGAPU(
|
||||
"XmaContext: kicking context %d (%d/%d bytes)", id(),
|
||||
(data.input_buffer_read_offset & ~0x7FF) / 8,
|
||||
(data.input_buffer_0_packet_count + data.input_buffer_1_packet_count) *
|
||||
kBytesPerPacket);
|
||||
XELOGAPU("XmaContext: kicking context %d (%d/%d bits)", id(),
|
||||
data.input_buffer_read_offset, (data.input_buffer_0_packet_count +
|
||||
data.input_buffer_1_packet_count) *
|
||||
kBytesPerPacket * 8);
|
||||
|
||||
data.Store(context_ptr);
|
||||
|
||||
|
@ -227,6 +224,26 @@ void XmaContext::DecodePackets(XMA_CONTEXT_DATA* data) {
|
|||
return;
|
||||
}
|
||||
|
||||
// XAudio Loops
|
||||
// loop_count:
|
||||
// - XAUDIO2_MAX_LOOP_COUNT = 254
|
||||
// - XAUDIO2_LOOP_INFINITE = 255
|
||||
// loop_start/loop_end are bit offsets to a specific frame
|
||||
//assert_true(data->loop_count == 0);
|
||||
|
||||
// Translate pointers for future use.
|
||||
uint8_t* in0 = data->input_buffer_0_valid
|
||||
? memory()->TranslatePhysical(data->input_buffer_0_ptr)
|
||||
: nullptr;
|
||||
uint8_t* in1 = data->input_buffer_1_valid
|
||||
? memory()->TranslatePhysical(data->input_buffer_1_ptr)
|
||||
: nullptr;
|
||||
|
||||
size_t input_buffer_0_size =
|
||||
data->input_buffer_0_packet_count * kBytesPerPacket;
|
||||
size_t input_buffer_1_size =
|
||||
data->input_buffer_1_packet_count * kBytesPerPacket;
|
||||
|
||||
// Output buffers are in raw PCM samples, 256 bytes per block.
|
||||
// Output buffer is a ring buffer. We need to write from the write offset
|
||||
// to the read offset.
|
||||
|
@ -246,76 +263,175 @@ void XmaContext::DecodePackets(XMA_CONTEXT_DATA* data) {
|
|||
|
||||
// Decode until we can't write any more data.
|
||||
while (output_remaining_bytes > 0) {
|
||||
// This'll copy audio samples into the output buffer.
|
||||
// The samples need to be 2 bytes long!
|
||||
// Copies one frame at a time, so keep calling this until size == 0.
|
||||
int written_bytes = 0;
|
||||
int decode_attempts_remaining = 3;
|
||||
|
||||
uint8_t work_buffer[kOutputMaxSizeBytes];
|
||||
while (decode_attempts_remaining) {
|
||||
size_t read_bytes = 0;
|
||||
written_bytes =
|
||||
DecodePacket(work_buffer, 0, output_remaining_bytes, &read_bytes);
|
||||
if (written_bytes >= 0) {
|
||||
// assert_true((written_bytes % 256) == 0);
|
||||
auto written_bytes_rb = output_rb.Write(work_buffer, written_bytes);
|
||||
assert_true(written_bytes == written_bytes_rb);
|
||||
|
||||
// Ok.
|
||||
break;
|
||||
} else if (read_bytes % 2048 == 0) {
|
||||
// Sometimes the decoder will fail on a packet. I think it's
|
||||
// looking for cross-packet frames and failing. If you run it again
|
||||
// on the same packet it'll work though.
|
||||
--decode_attempts_remaining;
|
||||
} else {
|
||||
// Failed in the middle of a packet, do not retry!
|
||||
decode_attempts_remaining = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!decode_attempts_remaining) {
|
||||
XELOGAPU("XmaContext: libav failed to decode packet (returned %.8X)",
|
||||
-written_bytes);
|
||||
|
||||
// Failed out.
|
||||
if (data->input_buffer_0_valid || data->input_buffer_1_valid) {
|
||||
// There's new data available - maybe we'll be ok if we decode it?
|
||||
written_bytes = 0;
|
||||
DiscardPacket();
|
||||
} else {
|
||||
// No data and hosed - bail.
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
data->output_buffer_write_offset = output_rb.write_offset() / 256;
|
||||
output_remaining_bytes -= written_bytes;
|
||||
|
||||
// If we need more data and the input buffers have it, grab it.
|
||||
if (written_bytes) {
|
||||
// Haven't finished with current packet.
|
||||
continue;
|
||||
} else if (data->input_buffer_0_valid || data->input_buffer_1_valid) {
|
||||
// Done with previous packet, so grab a new one.
|
||||
int ret = StartPacket(data);
|
||||
if (ret <= 0) {
|
||||
// No more data (but may have prepared a packet)
|
||||
data->input_buffer_0_valid = 0;
|
||||
data->input_buffer_1_valid = 0;
|
||||
}
|
||||
} else {
|
||||
// Decoder is out of data and there's no more to give.
|
||||
if (!data->input_buffer_0_valid && !data->input_buffer_1_valid) {
|
||||
// Out of data.
|
||||
break;
|
||||
}
|
||||
|
||||
int num_channels = data->is_stereo ? 2 : 1;
|
||||
|
||||
// Check if we have part of a frame waiting (and the game hasn't jumped
|
||||
// around)
|
||||
if (current_frame_pos_ &&
|
||||
last_input_read_pos_ == data->input_buffer_read_offset) {
|
||||
size_t to_write = std::min(
|
||||
output_remaining_bytes,
|
||||
((size_t)kBytesPerFrame * num_channels - current_frame_pos_));
|
||||
output_rb.Write(current_frame_, to_write);
|
||||
|
||||
current_frame_pos_ += to_write;
|
||||
if (current_frame_pos_ >= kBytesPerFrame * num_channels) {
|
||||
current_frame_pos_ = 0;
|
||||
}
|
||||
|
||||
data->output_buffer_write_offset = output_rb.write_offset() / 256;
|
||||
output_remaining_bytes -= to_write;
|
||||
continue;
|
||||
}
|
||||
|
||||
int block_last_frame = 0; // last frame in block?
|
||||
int got_frame = 0; // successfully decoded a frame?
|
||||
int frame_size = 0;
|
||||
packet_->data = in0;
|
||||
packet_->size = data->input_buffer_0_packet_count * 2048;
|
||||
PrepareDecoder(in0, data->input_buffer_0_packet_count * 2048,
|
||||
data->sample_rate, num_channels);
|
||||
int len = xma2_decode_frame(context_, packet_, decoded_frame_, &got_frame,
|
||||
&block_last_frame, &frame_size,
|
||||
data->input_buffer_read_offset);
|
||||
if (block_last_frame) {
|
||||
data->input_buffer_0_valid = 0;
|
||||
data->input_buffer_1_valid = 0;
|
||||
data->output_buffer_valid = 0;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (len == AVERROR_EOF) {
|
||||
// Screw this gtfo
|
||||
data->input_buffer_0_valid = 0;
|
||||
data->input_buffer_1_valid = 0;
|
||||
data->output_buffer_valid = 0;
|
||||
|
||||
continue;
|
||||
} else if (len < 0 || !got_frame) {
|
||||
// Oh no! Skip the frame and hope everything works.
|
||||
data->input_buffer_read_offset += frame_size;
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
XELOGD("LEN: %d (%x)", len, len);
|
||||
|
||||
data->input_buffer_read_offset += len;
|
||||
last_input_read_pos_ = data->input_buffer_read_offset;
|
||||
|
||||
// Copy to the output buffer.
|
||||
// Successfully decoded a frame.
|
||||
size_t written_bytes = 0;
|
||||
if (got_frame) {
|
||||
// Validity checks.
|
||||
if (decoded_frame_->nb_samples > kSamplesPerFrame) {
|
||||
return;
|
||||
} else if (context_->sample_fmt != AV_SAMPLE_FMT_FLTP) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Check the returned buffer size.
|
||||
if (av_samples_get_buffer_size(NULL, context_->channels,
|
||||
decoded_frame_->nb_samples,
|
||||
context_->sample_fmt, 1) !=
|
||||
context_->channels * decoded_frame_->nb_samples * sizeof(float)) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Loop through every sample, convert and drop it into the output array.
|
||||
// If more than one channel, the game wants the samples from each channel
|
||||
// interleaved next to each other.
|
||||
uint32_t o = 0;
|
||||
for (int i = 0; i < decoded_frame_->nb_samples; i++) {
|
||||
for (int j = 0; j < context_->channels; j++) {
|
||||
// Select the appropriate array based on the current channel.
|
||||
auto sample_array = reinterpret_cast<float*>(decoded_frame_->data[j]);
|
||||
|
||||
// Raw sample should be within [-1, 1].
|
||||
// Clamp it, just in case.
|
||||
float raw_sample = xe::saturate(sample_array[i]);
|
||||
|
||||
// Convert the sample and output it in big endian.
|
||||
float scaled_sample = raw_sample * ((1 << 15) - 1);
|
||||
int sample = static_cast<int>(scaled_sample);
|
||||
xe::store_and_swap<uint16_t>(¤t_frame_[o++ * 2],
|
||||
sample & 0xFFFF);
|
||||
}
|
||||
}
|
||||
current_frame_pos_ = 0;
|
||||
|
||||
if (output_remaining_bytes < kBytesPerFrame * num_channels) {
|
||||
// Output buffer isn't big enough to store the entire frame! Write out a
|
||||
// part of it.
|
||||
current_frame_pos_ = output_remaining_bytes;
|
||||
output_rb.Write(current_frame_, output_remaining_bytes);
|
||||
|
||||
written_bytes = output_remaining_bytes;
|
||||
} else {
|
||||
output_rb.Write(current_frame_, kBytesPerFrame * num_channels);
|
||||
|
||||
written_bytes = kBytesPerFrame * num_channels;
|
||||
}
|
||||
}
|
||||
|
||||
output_remaining_bytes -= written_bytes;
|
||||
data->output_buffer_write_offset = output_rb.write_offset() / 256;
|
||||
}
|
||||
|
||||
// The game will kick us again with a new output buffer later.
|
||||
data->output_buffer_valid = 0;
|
||||
}
|
||||
|
||||
uint32_t XmaContext::GetFramePacketNumber(uint8_t* block, size_t size,
|
||||
size_t bit_offset) {
|
||||
size *= 8;
|
||||
if (bit_offset >= size) {
|
||||
// Not good :(
|
||||
assert_always();
|
||||
return -1;
|
||||
}
|
||||
|
||||
size_t byte_offset = bit_offset >> 3;
|
||||
size_t packet_number = byte_offset / kBytesPerPacket;
|
||||
|
||||
return (uint32_t)packet_number;
|
||||
}
|
||||
|
||||
int XmaContext::PrepareDecoder(uint8_t* block, size_t size, int sample_rate,
|
||||
int channels) {
|
||||
// Sanity check: Packet metadata is always 1 for XMA2
|
||||
assert_true((block[2] & 0x7) == 1);
|
||||
|
||||
sample_rate = GetSampleRate(sample_rate);
|
||||
|
||||
// Re-initialize the context with new sample rate and channels.
|
||||
if (context_->sample_rate != sample_rate || context_->channels != channels) {
|
||||
// We have to reopen the codec so it'll realloc whatever data it needs.
|
||||
// TODO(DrChat): Find a better way.
|
||||
avcodec_close(context_);
|
||||
|
||||
context_->sample_rate = sample_rate;
|
||||
context_->channels = channels;
|
||||
extra_data_.channel_mask =
|
||||
channels == 2 ? AV_CH_LAYOUT_STEREO : AV_CH_LAYOUT_MONO;
|
||||
|
||||
if (avcodec_open2(context_, codec_, NULL) < 0) {
|
||||
XELOGE("XmaContext: Failed to reopen libav context");
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
av_frame_unref(decoded_frame_);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int XmaContext::StartPacket(XMA_CONTEXT_DATA* data) {
|
||||
// Translate pointers for future use.
|
||||
uint8_t* in0 = data->input_buffer_0_valid
|
||||
|
@ -340,41 +456,49 @@ int XmaContext::StartPacket(XMA_CONTEXT_DATA* data) {
|
|||
// Total input size
|
||||
uint32_t input_size_bytes = input_size_0_bytes + input_size_1_bytes;
|
||||
|
||||
// Calculate the first frame offset we need to decode.
|
||||
uint32_t frame_offset_bits = (data->input_buffer_read_offset % (2048 * 8));
|
||||
|
||||
// Input read offset is in bits. Typically starts at 32 (4 bytes).
|
||||
// "Sequence" offset - used internally for WMA Pro decoder.
|
||||
// Just the read offset.
|
||||
uint32_t seq_offset_bytes = (data->input_buffer_read_offset & ~0x7FF) / 8;
|
||||
uint32_t input_remaining_bytes = input_size_bytes - seq_offset_bytes;
|
||||
// NOTE: Read offset may not be at the first frame in a packet!
|
||||
uint32_t packet_offset_bytes = (data->input_buffer_read_offset & ~0x7FF) / 8;
|
||||
if (packet_offset_bytes % 2048 != 0) {
|
||||
packet_offset_bytes -= packet_offset_bytes % 2048;
|
||||
}
|
||||
uint32_t input_remaining_bytes = input_size_bytes - packet_offset_bytes;
|
||||
|
||||
if (seq_offset_bytes < input_size_bytes) {
|
||||
// Setup input offset and input buffer.
|
||||
uint32_t input_offset_bytes = seq_offset_bytes;
|
||||
auto input_buffer = in0;
|
||||
|
||||
if (seq_offset_bytes >= input_size_0_bytes && input_size_1_bytes) {
|
||||
// Size overlap, select input buffer 1.
|
||||
// TODO(DrChat): This needs testing.
|
||||
input_offset_bytes -= input_size_0_bytes;
|
||||
input_buffer = in1;
|
||||
}
|
||||
|
||||
// Still have data to read.
|
||||
auto packet = input_buffer + input_offset_bytes;
|
||||
assert_true(input_offset_bytes % 2048 == 0);
|
||||
PreparePacket(packet, seq_offset_bytes, kBytesPerPacket, sample_rate,
|
||||
channels);
|
||||
data->input_buffer_read_offset += kBytesPerPacket * 8;
|
||||
|
||||
input_remaining_bytes -= kBytesPerPacket;
|
||||
if (input_remaining_bytes <= 0) {
|
||||
// Used the last of the data but prepared a packet.
|
||||
return 0;
|
||||
}
|
||||
} else {
|
||||
if (packet_offset_bytes >= input_size_bytes) {
|
||||
// No more data available and no packet prepared.
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Setup input offset and input buffer.
|
||||
uint32_t input_offset_bytes = packet_offset_bytes;
|
||||
auto input_buffer = in0;
|
||||
|
||||
if (packet_offset_bytes >= input_size_0_bytes && input_size_1_bytes) {
|
||||
// Size overlap, select input buffer 1.
|
||||
// TODO(DrChat): This needs testing.
|
||||
input_offset_bytes -= input_size_0_bytes;
|
||||
input_buffer = in1;
|
||||
}
|
||||
|
||||
// Still have data to read.
|
||||
auto packet = input_buffer + input_offset_bytes;
|
||||
assert_true(input_offset_bytes % 2048 == 0);
|
||||
PreparePacket(packet, packet_offset_bytes, kBytesPerPacket, sample_rate,
|
||||
channels);
|
||||
|
||||
data->input_buffer_read_offset += kBytesPerPacket * 8;
|
||||
|
||||
input_remaining_bytes -= kBytesPerPacket;
|
||||
if (input_remaining_bytes <= 0) {
|
||||
// Used the last of the data but prepared a packet.
|
||||
return 0;
|
||||
}
|
||||
|
||||
return input_remaining_bytes;
|
||||
}
|
||||
|
||||
|
@ -390,15 +514,11 @@ int XmaContext::PreparePacket(uint8_t* input, size_t seq_offset, size_t size,
|
|||
return 1;
|
||||
}
|
||||
|
||||
std::memcpy(packet_data_, input, size);
|
||||
// Packet metadata is always 1 for XMA2
|
||||
assert_true((input[2] & 0x7) == 1);
|
||||
|
||||
// Modify the packet header so it's WMAPro compatible.
|
||||
auto int_packet_data = reinterpret_cast<int*>(packet_data_);
|
||||
*int_packet_data =
|
||||
(((seq_offset & 0x7800) | 0x400) >> 7) | (*int_packet_data & 0xFFFEFF08);
|
||||
|
||||
packet_->data = packet_data_;
|
||||
packet_->size = kBytesPerPacket;
|
||||
packet_->data = input;
|
||||
packet_->size = (int)size;
|
||||
|
||||
// Re-initialize the context with new sample rate and channels.
|
||||
if (context_->sample_rate != sample_rate || context_->channels != channels) {
|
||||
|
|
|
@ -60,8 +60,8 @@ struct XMA_CONTEXT_DATA {
|
|||
uint32_t input_buffer_1_packet_count : 12; // XMASetInputBuffer1, number of
|
||||
// 2KB packets. Max 4095 packets.
|
||||
// These packets form a block.
|
||||
uint32_t loop_subframe_end : 2; // +12bit, XMASetLoopData
|
||||
uint32_t unk_dword_1_a : 3; // ? might be loop_subframe_skip
|
||||
uint32_t loop_subframe_start : 2; // +12bit, XMASetLoopData
|
||||
uint32_t loop_subframe_end : 3; // +14bit, XMASetLoopData
|
||||
uint32_t loop_subframe_skip : 3; // +17bit, XMASetLoopData might be
|
||||
// subframe_decode_count
|
||||
uint32_t subframe_decode_count : 4; // +20bit might be subframe_skip_count
|
||||
|
@ -91,7 +91,7 @@ struct XMA_CONTEXT_DATA {
|
|||
// DWORD 7
|
||||
uint32_t output_buffer_ptr; // physical address
|
||||
// DWORD 8
|
||||
uint32_t overlap_add_ptr; // PtrOverlapAdd(?)
|
||||
uint32_t work_buffer_ptr; // PtrOverlapAdd(?)
|
||||
|
||||
// DWORD 9
|
||||
// +0bit, XMAGetOutputBufferReadOffset AKA WriteBufferOffsetRead
|
||||
|
@ -133,6 +133,7 @@ class XmaContext {
|
|||
static const uint32_t kBytesPerSample = 2;
|
||||
static const uint32_t kSamplesPerFrame = 512;
|
||||
static const uint32_t kSamplesPerSubframe = 128;
|
||||
static const uint32_t kBytesPerFrame = kSamplesPerFrame * kBytesPerSample;
|
||||
static const uint32_t kBytesPerSubframe =
|
||||
kSamplesPerSubframe * kBytesPerSample;
|
||||
|
||||
|
@ -165,6 +166,10 @@ class XmaContext {
|
|||
static int GetSampleRate(int id);
|
||||
|
||||
void DecodePackets(XMA_CONTEXT_DATA* data);
|
||||
uint32_t GetFramePacketNumber(uint8_t* block, size_t size, size_t bit_offset);
|
||||
int PrepareDecoder(uint8_t* block, size_t size, int sample_rate,
|
||||
int channels);
|
||||
|
||||
int StartPacket(XMA_CONTEXT_DATA* data);
|
||||
|
||||
int PreparePacket(uint8_t* input, size_t seq_offset, size_t size,
|
||||
|
@ -189,11 +194,11 @@ class XmaContext {
|
|||
AVPacket* packet_ = nullptr;
|
||||
WmaProExtraData extra_data_;
|
||||
|
||||
// If we didn't finish writing a frame to the output buffer, this is the offset.
|
||||
size_t current_frame_pos_ = 0;
|
||||
uint32_t last_input_read_pos_ = 0; // Last seen read buffer pos
|
||||
uint8_t* current_frame_ = nullptr;
|
||||
uint32_t frame_samples_size_ = 0;
|
||||
|
||||
uint8_t packet_data_[kBytesPerPacket];
|
||||
};
|
||||
|
||||
} // namespace apu
|
||||
|
|
|
@ -171,7 +171,7 @@ SHIM_CALL XMASetLoopData_shim(PPCContext* ppc_context,
|
|||
context.loop_end = loop_data->loop_end;
|
||||
context.loop_count = loop_data->loop_count;
|
||||
context.loop_subframe_end = loop_data->loop_subframe_end;
|
||||
context.loop_subframe_skip = loop_data->loop_subframe_end;
|
||||
context.loop_subframe_skip = loop_data->loop_subframe_skip;
|
||||
|
||||
context.Store(SHIM_MEM_ADDR(context_ptr));
|
||||
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
Subproject commit 4752bdcdbaf61afc933e667a016bca4eb389ac21
|
Loading…
Reference in New Issue