More improvements to the XMA decoder (and included some forgotten files)
This commit is contained in:
parent
0f9cd8cfb3
commit
d8ed66c336
|
@ -0,0 +1,9 @@
|
|||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<AutoVisualizer xmlns="http://schemas.microsoft.com/vstudio/debugger/natvis/2010">
|
||||
|
||||
<!-- Shims -->
|
||||
<Type Name="xe::apu::XmaContext">
|
||||
<DisplayString>id={id_}, allocated={is_allocated_}, enabled={is_enabled_}</DisplayString>
|
||||
</Type>
|
||||
|
||||
</AutoVisualizer>
|
|
@ -215,6 +215,7 @@ void XmaContext::DecodePackets(XMA_CONTEXT_DATA* data) {
|
|||
|
||||
// Quick die if there's no data.
|
||||
if (!data->input_buffer_0_valid && !data->input_buffer_1_valid) {
|
||||
XELOGAPU("Context %d: No valid input buffers!", id());
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -224,25 +225,36 @@ void XmaContext::DecodePackets(XMA_CONTEXT_DATA* data) {
|
|||
return;
|
||||
}
|
||||
|
||||
assert_zero(data->unk_dword_9);
|
||||
|
||||
// XAudio Loops
|
||||
// loop_count:
|
||||
// - XAUDIO2_MAX_LOOP_COUNT = 254
|
||||
// - XAUDIO2_LOOP_INFINITE = 255
|
||||
// loop_start/loop_end are bit offsets to a specific frame
|
||||
//assert_true(data->loop_count == 0);
|
||||
|
||||
// Translate pointers for future use.
|
||||
// Sometimes the game will use rolling input buffers. If they do, we cannot
|
||||
// assume they form a complete block! In addition, the buffers DO NOT have
|
||||
// to be sequential!
|
||||
// (bit.trip runner 2 does this)
|
||||
// TODO: Collect partial frames into a buffer if the game uses rolling buffers,
|
||||
// and present the full frame to libav when we get it.
|
||||
uint8_t* in0 = data->input_buffer_0_valid
|
||||
? memory()->TranslatePhysical(data->input_buffer_0_ptr)
|
||||
: nullptr;
|
||||
uint8_t* in1 = data->input_buffer_1_valid
|
||||
? memory()->TranslatePhysical(data->input_buffer_1_ptr)
|
||||
: nullptr;
|
||||
uint8_t* current_input_buffer = in0;
|
||||
|
||||
size_t input_buffer_0_size =
|
||||
data->input_buffer_0_packet_count * kBytesPerPacket;
|
||||
size_t input_buffer_1_size =
|
||||
data->input_buffer_1_packet_count * kBytesPerPacket;
|
||||
size_t current_input_size =
|
||||
data->current_buffer ? input_buffer_1_size : input_buffer_0_size;
|
||||
size_t input_total_size = input_buffer_0_size + input_buffer_1_size;
|
||||
|
||||
// Output buffers are in raw PCM samples, 256 bytes per block.
|
||||
// Output buffer is a ring buffer. We need to write from the write offset
|
||||
|
@ -289,50 +301,111 @@ void XmaContext::DecodePackets(XMA_CONTEXT_DATA* data) {
|
|||
continue;
|
||||
}
|
||||
|
||||
int block_last_frame = 0; // last frame in block?
|
||||
int got_frame = 0; // successfully decoded a frame?
|
||||
int invalid_frame = 0; // invalid frame?
|
||||
int got_frame = 0; // successfully decoded a frame?
|
||||
int frame_size = 0;
|
||||
packet_->data = in0;
|
||||
packet_->size = data->input_buffer_0_packet_count * 2048;
|
||||
PrepareDecoder(in0, data->input_buffer_0_packet_count * 2048,
|
||||
data->sample_rate, num_channels);
|
||||
packet_->data = current_input_buffer;
|
||||
packet_->size = (int)current_input_size;
|
||||
PrepareDecoder(in0, current_input_size, data->sample_rate, num_channels);
|
||||
int len = xma2_decode_frame(context_, packet_, decoded_frame_, &got_frame,
|
||||
&block_last_frame, &frame_size,
|
||||
&invalid_frame, &frame_size, 1,
|
||||
data->input_buffer_read_offset);
|
||||
if (block_last_frame) {
|
||||
data->input_buffer_0_valid = 0;
|
||||
data->input_buffer_1_valid = 0;
|
||||
data->output_buffer_valid = 0;
|
||||
continue;
|
||||
if (invalid_frame) {
|
||||
// Invalid frame/packet: length header is 0x7FFF
|
||||
// Sometimes there's frames in the middle of the stream flagged as
|
||||
// invalid.
|
||||
// Double-check to make sure we're not in the middle.
|
||||
uint32_t frame_byte_offset = data->input_buffer_read_offset >> 3;
|
||||
uint32_t packet_number = frame_byte_offset / 2048;
|
||||
if (packet_number < data->input_buffer_0_packet_count - 1) {
|
||||
// Okay. Skip to the beginning of the next packet.
|
||||
packet_number++;
|
||||
data->input_buffer_read_offset = (packet_number * 2048 * 8) + 32;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Last frame of the block. Swap buffers if necessary.
|
||||
if (data->current_buffer == 0) {
|
||||
if (data->input_buffer_1_valid) {
|
||||
data->current_buffer++;
|
||||
} else {
|
||||
// End of input.
|
||||
data->input_buffer_read_offset = input_total_size * 8;
|
||||
}
|
||||
|
||||
data->input_buffer_0_valid = 0;
|
||||
return;
|
||||
} else {
|
||||
// End of input.
|
||||
data->current_buffer = 0;
|
||||
data->input_buffer_1_valid = 0;
|
||||
data->input_buffer_read_offset = input_total_size * 8;
|
||||
return;
|
||||
}
|
||||
} else if (got_frame && len > 0) {
|
||||
// Valid frame.
|
||||
// Check and see if we need to loop back to any spot.
|
||||
if (data->loop_count > 0 &&
|
||||
data->input_buffer_read_offset == data->loop_end) {
|
||||
// Loop back to the beginning.
|
||||
data->input_buffer_read_offset = data->loop_start;
|
||||
if (data->loop_count < 255) {
|
||||
data->loop_count--;
|
||||
}
|
||||
} else {
|
||||
data->input_buffer_read_offset += len;
|
||||
if (data->current_buffer == 0 &&
|
||||
data->input_buffer_read_offset > input_buffer_0_size * 8) {
|
||||
// Overflow? Setup next buffer.
|
||||
data->current_buffer++;
|
||||
data->input_buffer_0_valid = 0;
|
||||
} else if (data->input_buffer_read_offset > input_total_size * 8) {
|
||||
// Overflow! The game will fix up the read offset.
|
||||
data->current_buffer = 0;
|
||||
data->input_buffer_0_valid = 0;
|
||||
data->input_buffer_1_valid = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (len == AVERROR_EOF) {
|
||||
// Screw this gtfo
|
||||
data->input_buffer_0_valid = 0;
|
||||
data->input_buffer_1_valid = 0;
|
||||
data->output_buffer_valid = 0;
|
||||
if ((len < 0 || !got_frame) && frame_size != 0) {
|
||||
// Oh no! Skip the frame and hope everything works.
|
||||
data->input_buffer_read_offset += frame_size;
|
||||
data->input_buffer_read_offset = (uint32_t)xma2_correct_frame_offset(
|
||||
in0, input_buffer_0_size, data->input_buffer_read_offset);
|
||||
|
||||
continue;
|
||||
} else if (len < 0 || !got_frame) {
|
||||
// Oh no! Skip the frame and hope everything works.
|
||||
data->input_buffer_read_offset += frame_size;
|
||||
|
||||
continue;
|
||||
// Did not get frame and could not get frame size.
|
||||
data->input_buffer_0_valid = 0;
|
||||
data->input_buffer_1_valid = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
XELOGD("LEN: %d (%x)", len, len);
|
||||
|
||||
data->input_buffer_read_offset += len;
|
||||
// Sometimes we may run up to <15 bits before the next packet. If this
|
||||
// happens, we need to automatically advance to the next frame.
|
||||
// We'll ask the XMA2 decoder to do this for us, since it's more qualified.
|
||||
data->input_buffer_read_offset = (uint32_t)xma2_correct_frame_offset(
|
||||
in0, input_buffer_0_size, data->input_buffer_read_offset);
|
||||
last_input_read_pos_ = data->input_buffer_read_offset;
|
||||
|
||||
if (data->input_buffer_read_offset == 0) {
|
||||
// Invalid offset. Out of data.
|
||||
data->input_buffer_0_valid = 0;
|
||||
data->input_buffer_1_valid = 0;
|
||||
}
|
||||
|
||||
// Copy to the output buffer.
|
||||
// Successfully decoded a frame.
|
||||
size_t written_bytes = 0;
|
||||
if (got_frame) {
|
||||
#ifdef DEBUG
|
||||
// Validity checks.
|
||||
if (decoded_frame_->nb_samples > kSamplesPerFrame) {
|
||||
XELOGAPU("Decoded frame has an invalid sample count!");
|
||||
return;
|
||||
} else if (context_->sample_fmt != AV_SAMPLE_FMT_FLTP) {
|
||||
XELOGAPU("libav decoder did not output floating point samples!");
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -343,27 +416,11 @@ void XmaContext::DecodePackets(XMA_CONTEXT_DATA* data) {
|
|||
context_->channels * decoded_frame_->nb_samples * sizeof(float)) {
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
// Loop through every sample, convert and drop it into the output array.
|
||||
// If more than one channel, the game wants the samples from each channel
|
||||
// interleaved next to each other.
|
||||
uint32_t o = 0;
|
||||
for (int i = 0; i < decoded_frame_->nb_samples; i++) {
|
||||
for (int j = 0; j < context_->channels; j++) {
|
||||
// Select the appropriate array based on the current channel.
|
||||
auto sample_array = reinterpret_cast<float*>(decoded_frame_->data[j]);
|
||||
|
||||
// Raw sample should be within [-1, 1].
|
||||
// Clamp it, just in case.
|
||||
float raw_sample = xe::saturate(sample_array[i]);
|
||||
|
||||
// Convert the sample and output it in big endian.
|
||||
float scaled_sample = raw_sample * ((1 << 15) - 1);
|
||||
int sample = static_cast<int>(scaled_sample);
|
||||
xe::store_and_swap<uint16_t>(¤t_frame_[o++ * 2],
|
||||
sample & 0xFFFF);
|
||||
}
|
||||
}
|
||||
// Convert the frame.
|
||||
ConvertFrame((const float**)decoded_frame_->data, context_->channels,
|
||||
decoded_frame_->nb_samples, current_frame_);
|
||||
current_frame_pos_ = 0;
|
||||
|
||||
if (output_remaining_bytes < kBytesPerFrame * num_channels) {
|
||||
|
@ -385,6 +442,7 @@ void XmaContext::DecodePackets(XMA_CONTEXT_DATA* data) {
|
|||
}
|
||||
|
||||
// The game will kick us again with a new output buffer later.
|
||||
// It's important that we only invalidate this if we actually wrote to it!!
|
||||
data->output_buffer_valid = 0;
|
||||
}
|
||||
|
||||
|
@ -405,8 +463,8 @@ uint32_t XmaContext::GetFramePacketNumber(uint8_t* block, size_t size,
|
|||
|
||||
int XmaContext::PrepareDecoder(uint8_t* block, size_t size, int sample_rate,
|
||||
int channels) {
|
||||
// Sanity check: Packet metadata is always 1 for XMA2
|
||||
assert_true((block[2] & 0x7) == 1);
|
||||
// Sanity check: Packet metadata is always 1 for XMA2/0 for XMA
|
||||
assert_true((block[2] & 0x7) == 1 || (block[2] & 0x7) == 0);
|
||||
|
||||
sample_rate = GetSampleRate(sample_rate);
|
||||
|
||||
|
@ -432,6 +490,32 @@ int XmaContext::PrepareDecoder(uint8_t* block, size_t size, int sample_rate,
|
|||
return 0;
|
||||
}
|
||||
|
||||
bool XmaContext::ConvertFrame(const float** samples, int num_channels,
|
||||
int num_samples, uint8_t* output_buffer) {
|
||||
// Loop through every sample, convert and drop it into the output array.
|
||||
// If more than one channel, we need to interleave the samples from each
|
||||
// channel next to each other.
|
||||
// TODO: This can definitely be optimized with AVX/SSE intrinsics!
|
||||
uint32_t o = 0;
|
||||
for (int i = 0; i < num_samples; i++) {
|
||||
for (int j = 0; j < num_channels; j++) {
|
||||
// Select the appropriate array based on the current channel.
|
||||
auto sample_array = samples[j];
|
||||
|
||||
// Raw sample should be within [-1, 1].
|
||||
// Clamp it, just in case.
|
||||
float raw_sample = xe::saturate(sample_array[i]);
|
||||
|
||||
// Convert the sample and output it in big endian.
|
||||
float scaled_sample = raw_sample * ((1 << 15) - 1);
|
||||
int sample = static_cast<int>(scaled_sample);
|
||||
xe::store_and_swap<uint16_t>(&output_buffer[o++ * 2], sample & 0xFFFF);
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
int XmaContext::StartPacket(XMA_CONTEXT_DATA* data) {
|
||||
// Translate pointers for future use.
|
||||
uint8_t* in0 = data->input_buffer_0_valid
|
||||
|
|
|
@ -60,8 +60,8 @@ struct XMA_CONTEXT_DATA {
|
|||
uint32_t input_buffer_1_packet_count : 12; // XMASetInputBuffer1, number of
|
||||
// 2KB packets. Max 4095 packets.
|
||||
// These packets form a block.
|
||||
uint32_t loop_subframe_start : 2; // +12bit, XMASetLoopData
|
||||
uint32_t loop_subframe_end : 3; // +14bit, XMASetLoopData
|
||||
uint32_t loop_subframe_start : 2; // +12bit, XMASetLoopData
|
||||
uint32_t loop_subframe_end : 3; // +14bit, XMASetLoopData
|
||||
uint32_t loop_subframe_skip : 3; // +17bit, XMASetLoopData might be
|
||||
// subframe_decode_count
|
||||
uint32_t subframe_decode_count : 4; // +20bit might be subframe_skip_count
|
||||
|
@ -77,10 +77,12 @@ struct XMA_CONTEXT_DATA {
|
|||
|
||||
// DWORD 3
|
||||
uint32_t loop_start : 26; // XMASetLoopData LoopStartOffset
|
||||
// frame offset in bits
|
||||
uint32_t unk_dword_3 : 6; // ? ParserErrorStatus/ParserErrorSet(?)
|
||||
|
||||
// DWORD 4
|
||||
uint32_t loop_end : 26; // XMASetLoopData LoopEndOffset
|
||||
// frame offset in bits
|
||||
uint32_t packet_metadata : 5; // XMAGetPacketMetadata
|
||||
uint32_t current_buffer : 1; // ?
|
||||
|
||||
|
@ -170,6 +172,9 @@ class XmaContext {
|
|||
int PrepareDecoder(uint8_t* block, size_t size, int sample_rate,
|
||||
int channels);
|
||||
|
||||
bool ConvertFrame(const float** samples, int num_channels, int num_samples,
|
||||
uint8_t* output_buffer);
|
||||
|
||||
int StartPacket(XMA_CONTEXT_DATA* data);
|
||||
|
||||
int PreparePacket(uint8_t* input, size_t seq_offset, size_t size,
|
||||
|
|
|
@ -55,7 +55,34 @@ XmaDecoder::XmaDecoder(cpu::Processor* processor)
|
|||
XmaDecoder::~XmaDecoder() = default;
|
||||
|
||||
void av_log_callback(void* avcl, int level, const char* fmt, va_list va) {
|
||||
xe::LogLineVarargs('A', fmt, va);
|
||||
#ifdef NDEBUG
|
||||
if (level > AV_LOG_WARNING) {
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
char level_char = '?';
|
||||
switch (level) {
|
||||
case AV_LOG_ERROR:
|
||||
level_char = '!';
|
||||
break;
|
||||
case AV_LOG_WARNING:
|
||||
level_char = 'w';
|
||||
break;
|
||||
case AV_LOG_INFO:
|
||||
level_char = 'i';
|
||||
break;
|
||||
case AV_LOG_VERBOSE:
|
||||
level_char = 'v';
|
||||
break;
|
||||
case AV_LOG_DEBUG:
|
||||
level_char = 'd';
|
||||
break;
|
||||
}
|
||||
|
||||
StringBuffer buff;
|
||||
buff.AppendVarargs(fmt, va);
|
||||
xe::LogLineVarargs(level_char, "libav: %s", buff.GetString());
|
||||
}
|
||||
|
||||
X_STATUS XmaDecoder::Setup(kernel::KernelState* kernel_state) {
|
||||
|
@ -86,7 +113,7 @@ X_STATUS XmaDecoder::Setup(kernel::KernelState* kernel_state) {
|
|||
}
|
||||
registers_.next_context = 1;
|
||||
|
||||
worker_running_ = true;
|
||||
//worker_running_ = true;
|
||||
worker_thread_ = kernel::object_ref<kernel::XHostThread>(
|
||||
new kernel::XHostThread(kernel_state, 128 * 1024, 0, [this]() {
|
||||
WorkerThreadMain();
|
||||
|
@ -213,6 +240,7 @@ void XmaDecoder::WriteRegister(uint32_t addr, uint32_t value) {
|
|||
uint32_t context_id = base_context_id + i;
|
||||
XmaContext& context = contexts_[context_id];
|
||||
context.Enable();
|
||||
context.Work();
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,43 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2015 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
// This file contains some functions used to help parse XMA data.
|
||||
|
||||
#ifndef XENIA_APU_XMA_HELPERS_H_
|
||||
#define XENIA_APU_XMA_HELPERS_H_
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
namespace xe {
|
||||
namespace apu {
|
||||
namespace xma {
|
||||
|
||||
// Get number of frames that /begin/ in this packet.
|
||||
uint32_t GetPacketFrameCount(uint8_t* packet) {
|
||||
return (uint8_t)(packet[0] >> 2);
|
||||
}
|
||||
|
||||
// Get the first frame offset in bits
|
||||
uint32_t GetPacketFrameOffset(uint8_t* packet) {
|
||||
return (uint16_t)((packet[0] << 13) | (packet[1] << 5) | (packet[2] >> 3)) + 32;
|
||||
}
|
||||
|
||||
uint32_t GetPacketMetadata(uint8_t* packet) {
|
||||
return (uint8_t)(packet[2] & 0x7);
|
||||
}
|
||||
|
||||
uint32_t GetPacketSkipCount(uint8_t* packet) {
|
||||
return (uint8_t)(packet[3]);
|
||||
}
|
||||
|
||||
} // namespace xma
|
||||
} // namespace apu
|
||||
} // namespace xe
|
||||
|
||||
#endif // XENIA_APU_XMA_HELPERS_H_
|
|
@ -1 +1 @@
|
|||
Subproject commit 4752bdcdbaf61afc933e667a016bca4eb389ac21
|
||||
Subproject commit 8be22f03d7e3c1663a66cc09375f840a7fc9a365
|
Loading…
Reference in New Issue