Moved the XMA decoder out of AudioSystem and into its own world (plus minor code cleanup in the process).

This commit is contained in:
gibbed 2015-06-21 02:25:24 -05:00
parent 736dba5aca
commit 021b5a3d17
13 changed files with 858 additions and 732 deletions

View File

@ -16,7 +16,6 @@
</ItemGroup>
<ItemGroup>
<ClCompile Include="src\xenia\apu\apu.cc" />
<ClCompile Include="src\xenia\apu\audio_decoder.cc" />
<ClCompile Include="src\xenia\apu\audio_driver.cc" />
<ClCompile Include="src\xenia\apu\audio_system.cc" />
<ClCompile Include="src\xenia\apu\nop\nop_apu.cc" />
@ -24,6 +23,8 @@
<ClCompile Include="src\xenia\apu\xaudio2\xaudio2_apu.cc" />
<ClCompile Include="src\xenia\apu\xaudio2\xaudio2_audio_driver.cc" />
<ClCompile Include="src\xenia\apu\xaudio2\xaudio2_audio_system.cc" />
<ClCompile Include="src\xenia\apu\xma_context.cc" />
<ClCompile Include="src\xenia\apu\xma_decoder.cc" />
<ClCompile Include="src\xenia\base\arena.cc" />
<ClCompile Include="src\xenia\base\clock.cc" />
<ClCompile Include="src\xenia\base\debugging_win.cc" />
@ -266,7 +267,6 @@
<ItemGroup>
<ClInclude Include="src\xenia\apu\apu-private.h" />
<ClInclude Include="src\xenia\apu\apu.h" />
<ClInclude Include="src\xenia\apu\audio_decoder.h" />
<ClInclude Include="src\xenia\apu\audio_driver.h" />
<ClInclude Include="src\xenia\apu\audio_system.h" />
<ClInclude Include="src\xenia\apu\nop\nop_apu-private.h" />
@ -276,6 +276,8 @@
<ClInclude Include="src\xenia\apu\xaudio2\xaudio2_apu.h" />
<ClInclude Include="src\xenia\apu\xaudio2\xaudio2_audio_driver.h" />
<ClInclude Include="src\xenia\apu\xaudio2\xaudio2_audio_system.h" />
<ClInclude Include="src\xenia\apu\xma_context.h" />
<ClInclude Include="src\xenia\apu\xma_decoder.h" />
<ClInclude Include="src\xenia\base\arena.h" />
<ClInclude Include="src\xenia\base\assert.h" />
<ClInclude Include="src\xenia\base\atomic.h" />

View File

@ -709,9 +709,6 @@
<ClCompile Include="src\xenia\kernel\xboxkrnl_error.cc">
<Filter>src\xenia\kernel</Filter>
</ClCompile>
<ClCompile Include="src\xenia\apu\audio_decoder.cc">
<Filter>src\xenia\apu</Filter>
</ClCompile>
<ClCompile Include="src\xenia\cpu\backend\x64\x64_code_cache.cc">
<Filter>src\xenia\cpu\backend\x64</Filter>
</ClCompile>
@ -775,6 +772,12 @@
<ClCompile Include="src\xenia\cpu\compiler\passes\memory_sequence_combination_pass.cc">
<Filter>src\xenia\cpu\compiler\passes</Filter>
</ClCompile>
<ClCompile Include="src\xenia\apu\xma_decoder.cc">
<Filter>src\xenia\apu</Filter>
</ClCompile>
<ClCompile Include="src\xenia\apu\xma_context.cc">
<Filter>src\xenia\apu</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<ClInclude Include="src\xenia\emulator.h">
@ -1404,9 +1407,6 @@
<ClInclude Include="third_party\xbyak\xbyak\xbyak.h">
<Filter>third_party\xbyak\xbyak</Filter>
</ClInclude>
<ClInclude Include="src\xenia\apu\audio_decoder.h">
<Filter>src\xenia\apu</Filter>
</ClInclude>
<ClInclude Include="src\xenia\debug\proto\breakpoints_generated.h">
<Filter>src\xenia\debug\proto</Filter>
</ClInclude>
@ -1500,6 +1500,12 @@
<ClInclude Include="src\xenia\cpu\backend\code_cache.h">
<Filter>src\xenia\cpu\backend</Filter>
</ClInclude>
<ClInclude Include="src\xenia\apu\xma_decoder.h">
<Filter>src\xenia\apu</Filter>
</ClInclude>
<ClInclude Include="src\xenia\apu\xma_context.h">
<Filter>src\xenia\apu</Filter>
</ClInclude>
</ItemGroup>
<ItemGroup>
<None Include="src\xenia\cpu\backend\x64\x64_sequence.inl">

View File

@ -13,6 +13,7 @@
#include <memory>
#include "xenia/apu/audio_system.h"
#include "xenia/apu/xma_decoder.h"
namespace xe {
class Emulator;

View File

@ -1,65 +0,0 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2015 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#ifndef XENIA_APU_AUDIO_DECODER_H_
#define XENIA_APU_AUDIO_DECODER_H_
#include "xenia/apu/audio_system.h"
// XMA audio format:
// From research, XMA appears to be based on WMA Pro with
// a few (very slight) modifications.
// XMA2 is fully backwards-compatible with XMA1.
// Helpful resources:
// https://github.com/koolkdev/libertyv/blob/master/libav_wrapper/xma2dec.c
// http://hcs64.com/mboard/forum.php?showthread=14818
// https://github.com/hrydgard/minidx9/blob/master/Include/xma2defs.h
// Forward declarations
struct AVCodec;
struct AVCodecContext;
struct AVFrame;
struct AVPacket;
namespace xe {
namespace apu {
class AudioDecoder {
public:
AudioDecoder();
~AudioDecoder();
int Initialize();
int PreparePacket(uint8_t* input, size_t seq_offset, size_t size,
int sample_rate, int channels);
void DiscardPacket();
int DecodePacket(uint8_t* output, size_t offset, size_t size);
private:
// libav structures
AVCodec* codec_;
AVCodecContext* context_;
AVFrame* decoded_frame_;
AVPacket* packet_;
size_t current_frame_pos_;
uint8_t* current_frame_;
uint32_t frame_samples_size_;
uint8_t packet_data_[XMAContextData::kBytesPerPacket];
};
} // namespace apu
} // namespace xe
#endif // XENIA_APU_AUDIO_DECODER_H_

View File

@ -10,7 +10,6 @@
#include "xenia/apu/audio_system.h"
#include "xenia/apu/audio_driver.h"
#include "xenia/apu/audio_decoder.h"
#include "xenia/base/logging.h"
#include "xenia/base/math.h"
#include "xenia/base/ring_buffer.h"
@ -21,10 +20,6 @@
#include "xenia/kernel/objects/xthread.h"
#include "xenia/profiling.h"
extern "C" {
#include "libavutil/log.h"
}
// As with normal Microsoft, there are like twelve different ways to access
// the audio APIs. Early games use XMA*() methods almost exclusively to touch
// decoders. Later games use XAudio*() and direct memory writes to the XMA
@ -36,20 +31,6 @@ extern "C" {
// The XMA*() functions just manipulate the audio system in the guest context
// and let the normal AudioSystem handling take it, to prevent duplicate
// implementations. They can be found in xboxkrnl_audio_xma.cc
//
// XMA details:
// https://devel.nuclex.org/external/svn/directx/trunk/include/xma2defs.h
// https://github.com/gdawg/fsbext/blob/master/src/xma_header.h
//
// XAudio2 uses XMA under the covers, and seems to map with the same
// restrictions of frame/subframe/etc:
// https://msdn.microsoft.com/en-us/library/windows/desktop/microsoft.directx_sdk.xaudio2.xaudio2_buffer(v=vs.85).aspx
//
// XMA contexts are 64b in size and tight bitfields. They are in physical
// memory not usually available to games. Games will use MmMapIoSpace to get
// the 64b pointer in user memory so they can party on it. If the game doesn't
// do this, it's likely they are either passing the context to XAudio or
// using the XMA* functions.
namespace xe {
namespace apu {
@ -59,8 +40,7 @@ using namespace xe::cpu;
AudioSystem::AudioSystem(Emulator* emulator)
: emulator_(emulator),
memory_(emulator->memory()),
worker_running_(false),
decoder_running_(false) {
worker_running_(false) {
std::memset(clients_, 0, sizeof(clients_));
for (size_t i = 0; i < kMaximumClientCount; ++i) {
unused_clients_.push(i);
@ -80,47 +60,9 @@ AudioSystem::~AudioSystem() {
CloseHandle(shutdown_event_);
}
void av_log_callback(void *avcl, int level, const char *fmt, va_list va) {
StringBuffer buff;
buff.AppendVarargs(fmt, va);
xe::log_line('i', "libav: %s", buff.GetString());
}
X_STATUS AudioSystem::Setup() {
processor_ = emulator_->processor();
// Let the processor know we want register access callbacks.
emulator_->memory()->AddVirtualMappedRange(
0x7FEA0000, 0xFFFF0000, 0x0000FFFF, this,
reinterpret_cast<MMIOReadCallback>(MMIOReadRegisterThunk),
reinterpret_cast<MMIOWriteCallback>(MMIOWriteRegisterThunk));
// Setup XMA contexts ptr.
registers_.xma_context_array_ptr = memory()->SystemHeapAlloc(
sizeof(XMAContextData) * kXmaContextCount, 256, kSystemHeapPhysical);
// Add all contexts to the free list.
for (int i = kXmaContextCount - 1; i >= 0; --i) {
uint32_t ptr = registers_.xma_context_array_ptr + i * sizeof(XMAContextData);
XMAContext& context = xma_context_array_[i];
// Initialize it
context.guest_ptr = ptr;
context.in_use = false;
context.kicked = false;
// Create a new decoder per context
// Needed because some data needs to be persisted across calls
// TODO: Need to destroy this on class destruction
context.decoder = new AudioDecoder();
context.decoder->Initialize();
}
registers_.next_context = 1;
// Setup libav logging callback
av_log_set_callback(av_log_callback);
worker_running_ = true;
worker_thread_ =
kernel::object_ref<kernel::XHostThread>(new kernel::XHostThread(
@ -131,16 +73,6 @@ X_STATUS AudioSystem::Setup() {
worker_thread_->set_name("Audio Worker");
worker_thread_->Create();
decoder_running_ = true;
decoder_thread_ =
kernel::object_ref<kernel::XHostThread>(new kernel::XHostThread(
emulator()->kernel_state(), 128 * 1024, 0, [this]() {
DecoderThreadMain();
return 0;
}));
decoder_thread_->set_name("Audio Decoder");
decoder_thread_->Create();
return X_STATUS_SUCCESS;
}
@ -197,34 +129,6 @@ void AudioSystem::WorkerThreadMain() {
// TODO(benvanik): call module API to kill?
}
void AudioSystem::DecoderThreadMain() {
while (decoder_running_) {
// Wait for a kick from WriteRegister.
//decoder_fence_.Wait();
// Check to see if we're supposed to exit
if (!decoder_running_) {
break;
}
// Okay, let's loop through XMA contexts to find ones we need to decode!
for (uint32_t n = 0; n < kXmaContextCount; n++) {
XMAContext& context = xma_context_array_[n];
if (context.in_use && context.kicked) {
context.lock.lock();
context.kicked = false;
auto context_ptr = memory()->TranslateVirtual(context.guest_ptr);
XMAContextData data(context_ptr);
ProcessXmaContext(context, data);
data.Store(context_ptr);
context.lock.unlock();
}
}
}
}
void AudioSystem::Initialize() {}
void AudioSystem::Shutdown() {
@ -232,66 +136,6 @@ void AudioSystem::Shutdown() {
SetEvent(shutdown_event_);
worker_thread_->Wait(0, 0, 0, nullptr);
worker_thread_.reset();
decoder_running_ = false;
decoder_fence_.Signal();
worker_thread_.reset();
memory()->SystemHeapFree(registers_.xma_context_array_ptr);
}
uint32_t AudioSystem::AllocateXmaContext() {
std::lock_guard<xe::mutex> lock(lock_);
for (uint32_t n = 0; n < kXmaContextCount; n++) {
XMAContext& context = xma_context_array_[n];
if (!context.in_use) {
context.in_use = true;
return context.guest_ptr;
}
}
return 0;
}
void AudioSystem::ReleaseXmaContext(uint32_t guest_ptr) {
std::lock_guard<xe::mutex> lock(lock_);
// Find it in the list.
for (uint32_t n = 0; n < kXmaContextCount; n++) {
XMAContext& context = xma_context_array_[n];
if (context.guest_ptr == guest_ptr) {
// Found it!
// Lock it in case the decoder thread is working on it now
context.lock.lock();
context.in_use = false;
auto context_ptr = memory()->TranslateVirtual(guest_ptr);
std::memset(context_ptr, 0, sizeof(XMAContextData)); // Zero it.
context.decoder->DiscardPacket();
context.lock.unlock();
break;
}
}
}
bool AudioSystem::BlockOnXmaContext(uint32_t guest_ptr, bool poll) {
std::lock_guard<xe::mutex> lock(lock_);
for (uint32_t n = 0; n < kXmaContextCount; n++) {
XMAContext& context = xma_context_array_[n];
if (context.guest_ptr == guest_ptr) {
if (!context.lock.try_lock()) {
if (poll) {
return false;
}
context.lock.lock();
}
context.lock.unlock();
return true;
}
}
return true;
}
X_STATUS AudioSystem::RegisterClient(uint32_t callback, uint32_t callback_arg,
@ -353,304 +197,5 @@ void AudioSystem::UnregisterClient(size_t index) {
assert_true(wait_result == WAIT_TIMEOUT);
}
void AudioSystem::ProcessXmaContext(XMAContext& context, XMAContextData& data) {
SCOPE_profile_cpu_f("apu");
// What I see:
// XMA outputs 2 bytes per sample
// 512 samples per frame (128 per subframe)
// Max output size is data.output_buffer_block_count * 256
// This decoder is fed packets (max 4095 per buffer)
// Packets contain "some" frames
// 32bit header (big endian)
// Frames are the smallest thing the SPUs can decode.
// They usually can span packets (libav handles this)
// Sample rates (data.sample_rate):
// 0 - 24 kHz ?
// 1 - 32 kHz
// 2 - 44.1 kHz ?
// 3 - 48 kHz ?
// SPUs also support stereo decoding. (data.is_stereo)
// Check the output buffer - we cannot decode anything else if it's
// unavailable.
if (!data.output_buffer_valid) {
return;
}
// Translate this for future use.
uint8_t* output_buffer = memory()->TranslatePhysical(data.output_buffer_ptr);
// Output buffers are in raw PCM samples, 256 bytes per block.
// Output buffer is a ring buffer. We need to write from the write offset
// to the read offset.
uint32_t output_capacity = data.output_buffer_block_count * 256;
uint32_t output_read_offset = data.output_buffer_read_offset * 256;
uint32_t output_write_offset = data.output_buffer_write_offset * 256;
RingBuffer output_rb(output_buffer, output_capacity);
output_rb.set_read_offset(output_read_offset);
output_rb.set_write_offset(output_write_offset);
size_t output_remaining_bytes = output_rb.write_count();
// Decode until we can't write any more data.
while (output_remaining_bytes > 0) {
// This'll copy audio samples into the output buffer.
// The samples need to be 2 bytes long!
// Copies one frame at a time, so keep calling this until size == 0
int read_bytes = 0;
int decode_attempts_remaining = 3;
uint8_t work_buffer[XMAContextData::kOutputMaxSizeBytes];
while (decode_attempts_remaining) {
read_bytes = context.decoder->DecodePacket(work_buffer, 0,
output_remaining_bytes);
if (read_bytes >= 0) {
//assert_true((read_bytes % 256) == 0);
auto written_bytes = output_rb.Write(work_buffer, read_bytes);
assert_true(read_bytes == written_bytes);
// Ok.
break;
} else {
// Sometimes the decoder will fail on a packet. I think it's
// looking for cross-packet frames and failing. If you run it again
// on the same packet it'll work though.
--decode_attempts_remaining;
}
}
if (!decode_attempts_remaining) {
XELOGAPU("AudioSystem: libav failed to decode packet (returned %.8X)", -read_bytes);
// Failed out.
if (data.input_buffer_0_valid || data.input_buffer_1_valid) {
// There's new data available - maybe we'll be ok if we decode it?
read_bytes = 0;
context.decoder->DiscardPacket();
} else {
// No data and hosed - bail.
break;
}
}
data.output_buffer_write_offset = output_rb.write_offset() / 256;
output_remaining_bytes -= read_bytes;
// If we need more data and the input buffers have it, grab it.
if (read_bytes) {
// Haven't finished with current packet.
continue;
} else if (data.input_buffer_0_valid || data.input_buffer_1_valid) {
// Done with previous packet, so grab a new one.
int ret = PrepareXMAPacket(context, data);
if (ret <= 0) {
// No more data (but may have prepared a packet)
data.input_buffer_0_valid = 0;
data.input_buffer_1_valid = 0;
}
} else {
// Decoder is out of data and there's no more to give.
break;
}
}
// The game will kick us again with a new output buffer later.
data.output_buffer_valid = 0;
}
int AudioSystem::PrepareXMAPacket(XMAContext &context, XMAContextData &data) {
// Translate pointers for future use.
uint8_t* in0 = data.input_buffer_0_valid
? memory()->TranslatePhysical(data.input_buffer_0_ptr)
: nullptr;
uint8_t* in1 = data.input_buffer_1_valid
? memory()->TranslatePhysical(data.input_buffer_1_ptr)
: nullptr;
int sample_rate = 0;
if (data.sample_rate == 0) {
sample_rate = 24000;
} else if (data.sample_rate == 1) {
sample_rate = 32000;
} else if (data.sample_rate == 2) {
sample_rate = 44100;
} else if (data.sample_rate == 3) {
sample_rate = 48000;
}
int channels = data.is_stereo ? 2 : 1;
// See if we've finished with the input.
// Block count is in packets, so expand by packet size.
uint32_t input_size_0_bytes = (data.input_buffer_0_packet_count) * 2048;
uint32_t input_size_1_bytes = (data.input_buffer_1_packet_count) * 2048;
// Total input size
uint32_t input_size_bytes = input_size_0_bytes + input_size_1_bytes;
// Input read offset is in bits. Typically starts at 32 (4 bytes).
// "Sequence" offset - used internally for WMA Pro decoder.
// Just the read offset.
uint32_t seq_offset_bytes = (data.input_buffer_read_offset & ~0x7FF) / 8;
uint32_t input_remaining_bytes = input_size_bytes - seq_offset_bytes;
if (seq_offset_bytes < input_size_bytes) {
// Setup input offset and input buffer.
uint32_t input_offset_bytes = seq_offset_bytes;
auto input_buffer = in0;
if (seq_offset_bytes >= input_size_0_bytes) {
// Size overlap, select input buffer 1.
// TODO: This needs testing.
input_offset_bytes -= input_size_0_bytes;
input_buffer = in1;
}
// Still have data to read.
auto packet = input_buffer + input_offset_bytes;
assert_true(input_offset_bytes % 2048 == 0);
context.decoder->PreparePacket(packet, seq_offset_bytes,
XMAContextData::kBytesPerPacket,
sample_rate, channels);
data.input_buffer_read_offset += XMAContextData::kBytesPerPacket * 8;
input_remaining_bytes -= XMAContextData::kBytesPerPacket;
if (input_remaining_bytes <= 0) {
// Used the last of the data but prepared a packet
return 0;
}
} else {
// No more data available and no packet prepared.
return -1;
}
return input_remaining_bytes;
}
// free60 may be useful here, however it looks like it's using a different
// piece of hardware:
// https://github.com/Free60Project/libxenon/blob/master/libxenon/drivers/xenon_sound/sound.c
uint64_t AudioSystem::ReadRegister(uint32_t addr) {
uint32_t r = addr & 0xFFFF;
XELOGAPU("ReadRegister(%.4X)", r);
// 1800h is read on startup and stored -- context? buffers?
// 1818h is read during a lock?
assert_true(r % 4 == 0);
uint32_t value = register_file_[r / 4];
// 1818 is rotating context processing # set to hardware ID of context being
// processed.
// If bit 200h is set, the locking code will possibly collide on hardware IDs
// and error out, so we should never set it (I think?).
if (r == 0x1818) {
// To prevent games from seeing a stuck XMA context, return a rotating
// number
registers_.current_context = registers_.next_context;
registers_.next_context = (registers_.next_context + 1) % kXmaContextCount;
value = registers_.current_context;
}
value = xe::byte_swap(value);
return value;
}
void AudioSystem::WriteRegister(uint32_t addr, uint64_t value) {
SCOPE_profile_cpu_f("apu");
uint32_t r = addr & 0xFFFF;
value = xe::byte_swap(uint32_t(value));
XELOGAPU("WriteRegister(%.4X, %.8X)", r, value);
// 1804h is written to with 0x02000000 and 0x03000000 around a lock operation
assert_true(r % 4 == 0);
register_file_[r / 4] = uint32_t(value);
if (r >= 0x1940 && r <= 0x1940 + 9 * 4) {
// Context kick command.
// This will kick off the given hardware contexts.
// Basically, this kicks the SPU and says "hey, decode that audio!"
// XMAEnableContext
// The context ID is a bit in the range of the entire context array.
for (int i = 0; value && i < 32; ++i) {
if (value & 1) {
uint32_t context_id = i + (r - 0x1940) / 4 * 32;
XMAContext& context = xma_context_array_[context_id];
context.lock.lock();
auto context_ptr = memory()->TranslateVirtual(context.guest_ptr);
XMAContextData data(context_ptr);
XELOGAPU("AudioSystem: kicking context %d (%d/%d bytes)", context_id,
(data.input_buffer_read_offset & ~0x7FF) / 8,
(data.input_buffer_0_packet_count + data.input_buffer_1_packet_count)
* XMAContextData::kBytesPerPacket);
// Reset valid flags so our audio decoder knows to process this one.
data.input_buffer_0_valid = data.input_buffer_0_ptr != 0;
data.input_buffer_1_valid = data.input_buffer_1_ptr != 0;
data.Store(context_ptr);
context.kicked = true;
context.lock.unlock();
}
value >>= 1;
}
// Signal the decoder thread to start processing.
decoder_fence_.Signal();
} else if (r >= 0x1A40 && r <= 0x1A40 + 9 * 4) {
// Context lock command.
// This requests a lock by flagging the context.
// XMADisableContext
for (int i = 0; value && i < 32; ++i) {
if (value & 1) {
uint32_t context_id = i + (r - 0x1A40) / 4 * 32;
XELOGAPU("AudioSystem: set context lock %d", context_id);
}
value >>= 1;
}
// Signal the decoder thread to start processing.
decoder_fence_.Signal();
} else if (r >= 0x1A80 && r <= 0x1A80 + 9 * 4) {
// Context clear command.
// This will reset the given hardware contexts.
for (int i = 0; value && i < 32; ++i) {
if (value & 1) {
uint32_t context_id = i + (r - 0x1A80) / 4 * 32;
XMAContext& context = xma_context_array_[context_id];
XELOGAPU("AudioSystem: reset context %d", context_id);
context.lock.lock();
auto context_ptr = memory()->TranslateVirtual(context.guest_ptr);
XMAContextData data(context_ptr);
context.decoder->DiscardPacket();
data.input_buffer_0_valid = 0;
data.input_buffer_1_valid = 0;
data.output_buffer_valid = 0;
data.output_buffer_read_offset = 0;
data.output_buffer_write_offset = 0;
data.Store(context_ptr);
context.lock.unlock();
}
value >>= 1;
}
} else {
value = value;
}
}
} // namespace apu
} // namespace xe

View File

@ -27,99 +27,8 @@ namespace xe {
namespace apu {
class AudioDriver;
class AudioDecoder;
// This is stored in guest space in big-endian order.
// We load and swap the whole thing to splat here so that we can
// use bitfields.
// This could be important:
// http://www.fmod.org/questions/question/forum-15859
// Appears to be dumped in order (for the most part)
// http://pastebin.com/9amqJ2kQ
struct XMAContextData {
static const uint32_t kBytesPerPacket = 2048;
static const uint32_t kSamplesPerFrame = 512;
static const uint32_t kSamplesPerSubframe = 128;
static const uint32_t kOutputBytesPerBlock = 256;
static const uint32_t kOutputMaxSizeBytes = 31 * kOutputBytesPerBlock;
// DWORD 0
uint32_t input_buffer_0_packet_count : 12; // XMASetInputBuffer0, number of
// 2KB packets. Max 4095 packets.
// These packets form a block.
uint32_t loop_count : 8; // +12bit, XMASetLoopData NumLoops
uint32_t input_buffer_0_valid : 1; // +20bit, XMAIsInputBuffer0Valid
uint32_t input_buffer_1_valid : 1; // +21bit, XMAIsInputBuffer1Valid
uint32_t output_buffer_block_count : 5; // +22bit SizeWrite 256byte blocks
uint32_t
output_buffer_write_offset : 5; // +27bit, XMAGetOutputBufferWriteOffset
// AKA OffsetWrite
// DWORD 1
uint32_t input_buffer_1_packet_count : 12; // XMASetInputBuffer1, number of
// 2KB packets. Max 4095 packets.
// These packets form a block.
uint32_t loop_subframe_end : 2; // +12bit, XMASetLoopData
uint32_t unk_dword_1_a : 3; // ? might be loop_subframe_skip
uint32_t loop_subframe_skip : 3; // +17bit, XMASetLoopData might be
// subframe_decode_count
uint32_t subframe_decode_count : 4; // +20bit might be subframe_skip_count
uint32_t unk_dword_1_b : 3; // ? NumSubframesToSkip/NumChannels(?)
uint32_t sample_rate : 2; // +27bit enum of sample rates
uint32_t is_stereo : 1; // +29bit
uint32_t unk_dword_1_c : 1; // +30bit
uint32_t output_buffer_valid : 1; // +31bit, XMAIsOutputBufferValid
// DWORD 2
uint32_t input_buffer_read_offset : 26; // XMAGetInputBufferReadOffset
uint32_t unk_dword_2 : 6; // ErrorStatus/ErrorSet (?)
// DWORD 3
uint32_t loop_start : 26; // XMASetLoopData LoopStartOffset
uint32_t unk_dword_3 : 6; // ? ParserErrorStatus/ParserErrorSet(?)
// DWORD 4
uint32_t loop_end : 26; // XMASetLoopData LoopEndOffset
uint32_t packet_metadata : 5; // XMAGetPacketMetadata
uint32_t current_buffer : 1; // ?
// DWORD 5
uint32_t input_buffer_0_ptr; // physical address
// DWORD 6
uint32_t input_buffer_1_ptr; // physical address
// DWORD 7
uint32_t output_buffer_ptr; // physical address
// DWORD 8
uint32_t overlap_add_ptr; // PtrOverlapAdd(?)
// DWORD 9
// +0bit, XMAGetOutputBufferReadOffset AKA WriteBufferOffsetRead
uint32_t output_buffer_read_offset : 5;
uint32_t unk_dword_9 : 27; // StopWhenDone/InterruptWhenDone(?)
// DWORD 10-15
uint32_t unk_dwords_10_15[6]; // reserved?
XMAContextData(const void* ptr) {
xe::copy_and_swap_32_aligned(reinterpret_cast<uint32_t*>(this),
reinterpret_cast<const uint32_t*>(ptr),
sizeof(XMAContextData) / 4);
}
void Store(void* ptr) {
xe::copy_and_swap_32_aligned(reinterpret_cast<uint32_t*>(ptr),
reinterpret_cast<const uint32_t*>(this),
sizeof(XMAContextData) / 4);
}
};
static_assert_size(XMAContextData, 64);
class AudioSystem {
protected:
struct XMAContext;
public:
virtual ~AudioSystem();
@ -130,13 +39,6 @@ class AudioSystem {
virtual X_STATUS Setup();
virtual void Shutdown();
uint32_t xma_context_array_ptr() const {
return registers_.xma_context_array_ptr;
}
uint32_t AllocateXmaContext();
void ReleaseXmaContext(uint32_t guest_ptr);
bool BlockOnXmaContext(uint32_t guest_ptr, bool poll);
X_STATUS RegisterClient(uint32_t callback, uint32_t callback_arg,
size_t* out_index);
void UnregisterClient(size_t index);
@ -146,9 +48,6 @@ class AudioSystem {
AudioDriver** out_driver) = 0;
virtual void DestroyDriver(AudioDriver* driver) = 0;
virtual uint64_t ReadRegister(uint32_t addr);
virtual void WriteRegister(uint32_t addr, uint64_t value);
// TODO(gibbed): respect XAUDIO2_MAX_QUEUED_BUFFERS somehow (ie min(64, XAUDIO2_MAX_QUEUED_BUFFERS))
static const size_t kMaximumQueuedFrames = 64;
@ -157,19 +56,6 @@ class AudioSystem {
private:
void WorkerThreadMain();
void DecoderThreadMain();
void ProcessXmaContext(XMAContext& context, XMAContextData& data);
int PrepareXMAPacket(XMAContext& context, XMAContextData& data);
static uint64_t MMIOReadRegisterThunk(void* ppc_context, AudioSystem* as,
uint32_t addr) {
return as->ReadRegister(addr);
}
static void MMIOWriteRegisterThunk(void* ppc_context, AudioSystem* as,
uint32_t addr, uint64_t value) {
as->WriteRegister(addr, value);
}
protected:
AudioSystem(Emulator* emulator);
@ -181,47 +67,8 @@ class AudioSystem {
std::atomic<bool> worker_running_;
kernel::object_ref<kernel::XHostThread> worker_thread_;
std::atomic<bool> decoder_running_;
kernel::object_ref<kernel::XHostThread> decoder_thread_;
xe::threading::Fence decoder_fence_;
xe::mutex lock_;
// Stored little endian, accessed through 0x7FEA....
union {
struct {
union {
struct {
uint8_t ignored0[0x1800];
// 1800h; points to guest-space physical block of 320 contexts.
uint32_t xma_context_array_ptr;
};
struct {
uint8_t ignored1[0x1818];
// 1818h; current context ID.
uint32_t current_context;
// 181Ch; next context ID to process.
uint32_t next_context;
};
};
} registers_;
uint32_t register_file_[0xFFFF / 4];
};
struct XMAContext {
uint32_t guest_ptr;
xe::mutex lock;
bool in_use;
bool kicked;
AudioDecoder* decoder;
};
static const uint32_t kXmaContextCount = 320; // // Total number of XMA contexts available.
XMAContext xma_context_array_[kXmaContextCount];
std::vector<uint32_t> xma_context_free_list_;
std::vector<uint32_t> xma_context_used_list_; // XMA contexts in use
static const size_t kMaximumClientCount = 8;
struct {

View File

@ -7,13 +7,12 @@
******************************************************************************
*/
#include "xenia/apu/audio_decoder.h"
#include "xenia/apu/xma_context.h"
#include "xenia/apu/xma_decoder.h"
#include "xenia/base/logging.h"
#include <cstring>
#include "xenia/apu/audio_system.h"
#include "xenia/base/logging.h"
extern "C" {
#include "libavcodec/avcodec.h"
}
@ -24,13 +23,13 @@ extern "C" {
namespace xe {
namespace apu {
AudioDecoder::AudioDecoder()
XmaContext::XmaContext()
: codec_(nullptr),
context_(nullptr),
decoded_frame_(nullptr),
packet_(nullptr) {}
AudioDecoder::~AudioDecoder() {
XmaContext::~XmaContext() {
if (context_) {
if (context_->extradata) {
delete context_->extradata;
@ -48,7 +47,7 @@ AudioDecoder::~AudioDecoder() {
}
}
int AudioDecoder::Initialize() {
int XmaContext::Initialize() {
static bool avcodec_initialized = false;
if (!avcodec_initialized) {
avcodec_register_all();
@ -77,7 +76,7 @@ int AudioDecoder::Initialize() {
// Initialize these to 0. They'll actually be set later.
context_->channels = 0;
context_->sample_rate = 0;
context_->block_align = XMAContextData::kBytesPerPacket;
context_->block_align = XMA_CONTEXT_DATA::kBytesPerPacket;
// Extra data passed to the decoder
context_->extradata_size = 18;
@ -86,7 +85,7 @@ int AudioDecoder::Initialize() {
// Current frame stuff whatever
// samples per frame * 2 max channels * output bytes
current_frame_ =
new uint8_t[XMAContextData::kSamplesPerFrame * 2 * 2];
new uint8_t[XMA_CONTEXT_DATA::kSamplesPerFrame * 2 * 2];
current_frame_pos_ = 0;
frame_samples_size_ = 0;
@ -99,9 +98,9 @@ int AudioDecoder::Initialize() {
return 0;
}
int AudioDecoder::PreparePacket(uint8_t *input, size_t seq_offset, size_t size,
int XmaContext::PreparePacket(uint8_t *input, size_t seq_offset, size_t size,
int sample_rate, int channels) {
if (size != XMAContextData::kBytesPerPacket) {
if (size != XMA_CONTEXT_DATA::kBytesPerPacket) {
// Invalid packet size!
assert_always();
return 1;
@ -118,7 +117,7 @@ int AudioDecoder::PreparePacket(uint8_t *input, size_t seq_offset, size_t size,
(*((int *)packet_data_) & 0xFFFEFF08);
packet_->data = packet_data_;
packet_->size = XMAContextData::kBytesPerPacket;
packet_->size = XMA_CONTEXT_DATA::kBytesPerPacket;
// Re-initialize the context with new sample rate and channels
if (context_->sample_rate != sample_rate || context_->channels != channels) {
@ -137,7 +136,7 @@ int AudioDecoder::PreparePacket(uint8_t *input, size_t seq_offset, size_t size,
return 0;
}
void AudioDecoder::DiscardPacket() {
void XmaContext::DiscardPacket() {
if (packet_->size > 0 || current_frame_pos_ != frame_samples_size_) {
packet_->data = 0;
packet_->size = 0;
@ -145,7 +144,7 @@ void AudioDecoder::DiscardPacket() {
}
}
int AudioDecoder::DecodePacket(uint8_t *output, size_t output_offset,
int XmaContext::DecodePacket(uint8_t *output, size_t output_offset,
size_t output_size) {
size_t to_copy = 0;
size_t original_offset = output_offset;
@ -180,7 +179,7 @@ int AudioDecoder::DecodePacket(uint8_t *output, size_t output_offset,
// Successfully decoded a frame
if (got_frame) {
// Validity checks.
if (decoded_frame_->nb_samples > XMAContextData::kSamplesPerFrame) {
if (decoded_frame_->nb_samples > XMA_CONTEXT_DATA::kSamplesPerFrame) {
return -2;
} else if (context_->sample_fmt != AV_SAMPLE_FMT_FLTP) {
return -3;

169
src/xenia/apu/xma_context.h Normal file
View File

@ -0,0 +1,169 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2015 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#ifndef XENIA_APU_XMA_CONTEXT_H_
#define XENIA_APU_XMA_CONTEXT_H_
#include <atomic>
#include <mutex>
#include <queue>
#include "xenia/emulator.h"
#include "xenia/xbox.h"
// XMA audio format:
// From research, XMA appears to be based on WMA Pro with
// a few (very slight) modifications.
// XMA2 is fully backwards-compatible with XMA1.
// Helpful resources:
// https://github.com/koolkdev/libertyv/blob/master/libav_wrapper/xma2dec.c
// http://hcs64.com/mboard/forum.php?showthread=14818
// https://github.com/hrydgard/minidx9/blob/master/Include/xma2defs.h
// Forward declarations
struct AVCodec;
struct AVCodecContext;
struct AVFrame;
struct AVPacket;
namespace xe {
namespace apu {
// This is stored in guest space in big-endian order.
// We load and swap the whole thing to splat here so that we can
// use bitfields.
// This could be important:
// http://www.fmod.org/questions/question/forum-15859
// Appears to be dumped in order (for the most part)
struct XMA_CONTEXT_DATA {
static const uint32_t kBytesPerPacket = 2048;
static const uint32_t kSamplesPerFrame = 512;
static const uint32_t kSamplesPerSubframe = 128;
static const uint32_t kOutputBytesPerBlock = 256;
static const uint32_t kOutputMaxSizeBytes = 31 * kOutputBytesPerBlock;
// DWORD 0
uint32_t input_buffer_0_packet_count : 12; // XMASetInputBuffer0, number of
// 2KB packets. Max 4095 packets.
// These packets form a block.
uint32_t loop_count : 8; // +12bit, XMASetLoopData NumLoops
uint32_t input_buffer_0_valid : 1; // +20bit, XMAIsInputBuffer0Valid
uint32_t input_buffer_1_valid : 1; // +21bit, XMAIsInputBuffer1Valid
uint32_t output_buffer_block_count : 5; // +22bit SizeWrite 256byte blocks
uint32_t output_buffer_write_offset : 5; // +27bit
// XMAGetOutputBufferWriteOffset
// AKA OffsetWrite
// DWORD 1
uint32_t input_buffer_1_packet_count : 12; // XMASetInputBuffer1, number of
// 2KB packets. Max 4095 packets.
// These packets form a block.
uint32_t loop_subframe_end : 2; // +12bit, XMASetLoopData
uint32_t unk_dword_1_a : 3; // ? might be loop_subframe_skip
uint32_t loop_subframe_skip : 3; // +17bit, XMASetLoopData might be
// subframe_decode_count
uint32_t subframe_decode_count : 4; // +20bit might be subframe_skip_count
uint32_t unk_dword_1_b : 3; // ? NumSubframesToSkip/NumChannels(?)
uint32_t sample_rate : 2; // +27bit enum of sample rates
uint32_t is_stereo : 1; // +29bit
uint32_t unk_dword_1_c : 1; // +30bit
uint32_t output_buffer_valid : 1; // +31bit, XMAIsOutputBufferValid
// DWORD 2
uint32_t input_buffer_read_offset : 26; // XMAGetInputBufferReadOffset
uint32_t unk_dword_2 : 6; // ErrorStatus/ErrorSet (?)
// DWORD 3
uint32_t loop_start : 26; // XMASetLoopData LoopStartOffset
uint32_t unk_dword_3 : 6; // ? ParserErrorStatus/ParserErrorSet(?)
// DWORD 4
uint32_t loop_end : 26; // XMASetLoopData LoopEndOffset
uint32_t packet_metadata : 5; // XMAGetPacketMetadata
uint32_t current_buffer : 1; // ?
// DWORD 5
uint32_t input_buffer_0_ptr; // physical address
// DWORD 6
uint32_t input_buffer_1_ptr; // physical address
// DWORD 7
uint32_t output_buffer_ptr; // physical address
// DWORD 8
uint32_t overlap_add_ptr; // PtrOverlapAdd(?)
// DWORD 9
// +0bit, XMAGetOutputBufferReadOffset AKA WriteBufferOffsetRead
uint32_t output_buffer_read_offset : 5;
uint32_t unk_dword_9 : 27; // StopWhenDone/InterruptWhenDone(?)
// DWORD 10-15
uint32_t unk_dwords_10_15[6]; // reserved?
XMA_CONTEXT_DATA(const void* ptr) {
xe::copy_and_swap(reinterpret_cast<uint32_t*>(this),
reinterpret_cast<const uint32_t*>(ptr),
sizeof(XMA_CONTEXT_DATA) / 4);
}
void Store(void* ptr) {
xe::copy_and_swap(reinterpret_cast<uint32_t*>(ptr),
reinterpret_cast<const uint32_t*>(this),
sizeof(XMA_CONTEXT_DATA) / 4);
}
};
static_assert_size(XMA_CONTEXT_DATA, 64);
class XmaContext {
public:
XmaContext();
~XmaContext();
int Initialize();
int PreparePacket(uint8_t* input, size_t seq_offset, size_t size,
int sample_rate, int channels);
void DiscardPacket();
int DecodePacket(uint8_t* output, size_t offset, size_t size);
uint32_t guest_ptr() { return guest_ptr_; }
xe::mutex& lock() { return lock_; } // TODO(gibbed): remove this
bool in_use() { return in_use_; }
bool kicked() { return kicked_; }
void set_guest_ptr(uint32_t guest_ptr) { guest_ptr_ = guest_ptr; }
void set_in_use(bool in_use) { in_use_ = in_use; }
void set_kicked(bool kicked) { kicked_ = kicked; }
private:
uint32_t guest_ptr_;
xe::mutex lock_;
bool in_use_;
bool kicked_;
// libav structures
AVCodec* codec_;
AVCodecContext* context_;
AVFrame* decoded_frame_;
AVPacket* packet_;
size_t current_frame_pos_;
uint8_t* current_frame_;
uint32_t frame_samples_size_;
uint8_t packet_data_[XMA_CONTEXT_DATA::kBytesPerPacket];
};
} // namespace apu
} // namespace xe
#endif // XENIA_APU_XMA_CONTEXT_H_

View File

@ -0,0 +1,496 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2013 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#include "xenia/apu/audio_system.h"
#include "xenia/apu/xma_context.h"
#include "xenia/apu/xma_decoder.h"
#include "xenia/base/logging.h"
#include "xenia/base/math.h"
#include "xenia/base/ring_buffer.h"
#include "xenia/base/string_buffer.h"
#include "xenia/cpu/processor.h"
#include "xenia/cpu/thread_state.h"
#include "xenia/emulator.h"
#include "xenia/kernel/objects/xthread.h"
#include "xenia/profiling.h"
extern "C" {
#include "libavutil/log.h"
}
// As with normal Microsoft, there are like twelve different ways to access
// the audio APIs. Early games use XMA*() methods almost exclusively to touch
// decoders. Later games use XAudio*() and direct memory writes to the XMA
// structures (as opposed to the XMA* calls), meaning that we have to support
// both.
//
// For ease of implementation, most audio related processing is handled in
// AudioSystem, and the functions here call off to it.
// The XMA*() functions just manipulate the audio system in the guest context
// and let the normal AudioSystem handling take it, to prevent duplicate
// implementations. They can be found in xboxkrnl_audio_xma.cc
//
// XMA details:
// https://devel.nuclex.org/external/svn/directx/trunk/include/xma2defs.h
// https://github.com/gdawg/fsbext/blob/master/src/xma_header.h
//
// XAudio2 uses XMA under the covers, and seems to map with the same
// restrictions of frame/subframe/etc:
// https://msdn.microsoft.com/en-us/library/windows/desktop/microsoft.directx_sdk.xaudio2.xaudio2_buffer(v=vs.85).aspx
//
// XMA contexts are 64b in size and tight bitfields. They are in physical
// memory not usually available to games. Games will use MmMapIoSpace to get
// the 64b pointer in user memory so they can party on it. If the game doesn't
// do this, it's likely they are either passing the context to XAudio or
// using the XMA* functions.
namespace xe {
namespace apu {
using namespace xe::cpu;
XmaDecoder::XmaDecoder(Emulator* emulator)
: emulator_(emulator),
memory_(emulator->memory()),
worker_running_(false) {
}
XmaDecoder::~XmaDecoder() {
}
void av_log_callback(void *avcl, int level, const char *fmt, va_list va) {
StringBuffer buff;
buff.AppendVarargs(fmt, va);
xe::log_line('i', "libav: %s", buff.GetString());
}
X_STATUS XmaDecoder::Setup() {
processor_ = emulator_->processor();
// Setup libav logging callback
av_log_set_callback(av_log_callback);
// Let the processor know we want register access callbacks.
emulator_->memory()->AddVirtualMappedRange(
0x7FEA0000, 0xFFFF0000, 0x0000FFFF, this,
reinterpret_cast<MMIOReadCallback>(MMIOReadRegisterThunk),
reinterpret_cast<MMIOWriteCallback>(MMIOWriteRegisterThunk));
// Setup XMA contexts ptr.
registers_.context_array_ptr = memory()->SystemHeapAlloc(
sizeof(XMA_CONTEXT_DATA) * kContextCount, 256, kSystemHeapPhysical);
// Add all contexts to the free list.
for (int i = kContextCount - 1; i >= 0; --i) {
uint32_t ptr = registers_.context_array_ptr + i * sizeof(XMA_CONTEXT_DATA);
XmaContext& context = context_array_[i];
context.set_guest_ptr(ptr);
context.Initialize();
}
registers_.next_context = 1;
worker_running_ = true;
worker_thread_ =
kernel::object_ref<kernel::XHostThread>(new kernel::XHostThread(
emulator()->kernel_state(), 128 * 1024, 0, [this]() {
WorkerThreadMain();
return 0;
}));
worker_thread_->set_name("XMA Decoder");
worker_thread_->Create();
return X_STATUS_SUCCESS;
}
void XmaDecoder::WorkerThreadMain() {
while (worker_running_) {
// Okay, let's loop through XMA contexts to find ones we need to decode!
for (uint32_t n = 0; n < kContextCount; n++) {
XmaContext& context = context_array_[n];
if (context.in_use() && context.kicked()) {
context.lock().lock();
context.set_kicked(false);
auto context_ptr = memory()->TranslateVirtual(context.guest_ptr());
XMA_CONTEXT_DATA data(context_ptr);
ProcessContext(context, data);
data.Store(context_ptr);
context.lock().unlock();
}
}
}
}
void XmaDecoder::Initialize() {}
void XmaDecoder::Shutdown() {
worker_running_ = false;
worker_fence_.Signal();
worker_thread_.reset();
memory()->SystemHeapFree(registers_.context_array_ptr);
}
uint32_t XmaDecoder::AllocateContext() {
std::lock_guard<xe::mutex> lock(lock_);
for (uint32_t n = 0; n < kContextCount; n++) {
XmaContext& context = context_array_[n];
if (!context.in_use()) {
context.set_in_use(true);
return context.guest_ptr();
}
}
return 0;
}
void XmaDecoder::ReleaseContext(uint32_t guest_ptr) {
std::lock_guard<xe::mutex> lock(lock_);
// Find it in the list.
for (uint32_t n = 0; n < kContextCount; n++) {
XmaContext& context = context_array_[n];
if (context.guest_ptr() == guest_ptr) {
// Found it!
// Lock it in case the decoder thread is working on it now
context.lock().lock();
context.set_in_use(false);
auto context_ptr = memory()->TranslateVirtual(guest_ptr);
std::memset(context_ptr, 0, sizeof(XMA_CONTEXT_DATA)); // Zero it.
context.DiscardPacket();
context.lock().unlock();
break;
}
}
}
bool XmaDecoder::BlockOnContext(uint32_t guest_ptr, bool poll) {
std::lock_guard<xe::mutex> lock(lock_);
for (uint32_t n = 0; n < kContextCount; n++) {
XmaContext& context = context_array_[n];
if (context.guest_ptr() == guest_ptr) {
if (!context.lock().try_lock()) {
if (poll) {
return false;
}
context.lock().lock();
}
context.lock().unlock();
return true;
}
}
return true;
}
void XmaDecoder::ProcessContext(XmaContext& context, XMA_CONTEXT_DATA& data) {
SCOPE_profile_cpu_f("apu");
// What I see:
// XMA outputs 2 bytes per sample
// 512 samples per frame (128 per subframe)
// Max output size is data.output_buffer_block_count * 256
// This decoder is fed packets (max 4095 per buffer)
// Packets contain "some" frames
// 32bit header (big endian)
// Frames are the smallest thing the SPUs can decode.
// They usually can span packets (libav handles this)
// Sample rates (data.sample_rate):
// 0 - 24 kHz ?
// 1 - 32 kHz
// 2 - 44.1 kHz ?
// 3 - 48 kHz ?
// SPUs also support stereo decoding. (data.is_stereo)
// Check the output buffer - we cannot decode anything else if it's
// unavailable.
if (!data.output_buffer_valid) {
return;
}
// Translate this for future use.
uint8_t* output_buffer = memory()->TranslatePhysical(data.output_buffer_ptr);
// Output buffers are in raw PCM samples, 256 bytes per block.
// Output buffer is a ring buffer. We need to write from the write offset
// to the read offset.
uint32_t output_capacity = data.output_buffer_block_count * 256;
uint32_t output_read_offset = data.output_buffer_read_offset * 256;
uint32_t output_write_offset = data.output_buffer_write_offset * 256;
RingBuffer output_rb(output_buffer, output_capacity);
output_rb.set_read_offset(output_read_offset);
output_rb.set_write_offset(output_write_offset);
size_t output_remaining_bytes = output_rb.write_count();
// Decode until we can't write any more data.
while (output_remaining_bytes > 0) {
// This'll copy audio samples into the output buffer.
// The samples need to be 2 bytes long!
// Copies one frame at a time, so keep calling this until size == 0
int read_bytes = 0;
int decode_attempts_remaining = 3;
uint8_t work_buffer[XMA_CONTEXT_DATA::kOutputMaxSizeBytes];
while (decode_attempts_remaining) {
read_bytes = context.DecodePacket(work_buffer, 0,
output_remaining_bytes);
if (read_bytes >= 0) {
//assert_true((read_bytes % 256) == 0);
auto written_bytes = output_rb.Write(work_buffer, read_bytes);
assert_true(read_bytes == written_bytes);
// Ok.
break;
} else {
// Sometimes the decoder will fail on a packet. I think it's
// looking for cross-packet frames and failing. If you run it again
// on the same packet it'll work though.
--decode_attempts_remaining;
}
}
if (!decode_attempts_remaining) {
XELOGAPU("AudioSystem: libav failed to decode packet (returned %.8X)", -read_bytes);
// Failed out.
if (data.input_buffer_0_valid || data.input_buffer_1_valid) {
// There's new data available - maybe we'll be ok if we decode it?
read_bytes = 0;
context.DiscardPacket();
} else {
// No data and hosed - bail.
break;
}
}
data.output_buffer_write_offset = output_rb.write_offset() / 256;
output_remaining_bytes -= read_bytes;
// If we need more data and the input buffers have it, grab it.
if (read_bytes) {
// Haven't finished with current packet.
continue;
} else if (data.input_buffer_0_valid || data.input_buffer_1_valid) {
// Done with previous packet, so grab a new one.
int ret = PreparePacket(context, data);
if (ret <= 0) {
// No more data (but may have prepared a packet)
data.input_buffer_0_valid = 0;
data.input_buffer_1_valid = 0;
}
} else {
// Decoder is out of data and there's no more to give.
break;
}
}
// The game will kick us again with a new output buffer later.
data.output_buffer_valid = 0;
}
int XmaDecoder::PreparePacket(XmaContext &context, XMA_CONTEXT_DATA &data) {
// Translate pointers for future use.
uint8_t* in0 = data.input_buffer_0_valid
? memory()->TranslatePhysical(data.input_buffer_0_ptr)
: nullptr;
uint8_t* in1 = data.input_buffer_1_valid
? memory()->TranslatePhysical(data.input_buffer_1_ptr)
: nullptr;
int sample_rate = 0;
if (data.sample_rate == 0) {
sample_rate = 24000;
} else if (data.sample_rate == 1) {
sample_rate = 32000;
} else if (data.sample_rate == 2) {
sample_rate = 44100;
} else if (data.sample_rate == 3) {
sample_rate = 48000;
}
int channels = data.is_stereo ? 2 : 1;
// See if we've finished with the input.
// Block count is in packets, so expand by packet size.
uint32_t input_size_0_bytes = (data.input_buffer_0_packet_count) * 2048;
uint32_t input_size_1_bytes = (data.input_buffer_1_packet_count) * 2048;
// Total input size
uint32_t input_size_bytes = input_size_0_bytes + input_size_1_bytes;
// Input read offset is in bits. Typically starts at 32 (4 bytes).
// "Sequence" offset - used internally for WMA Pro decoder.
// Just the read offset.
uint32_t seq_offset_bytes = (data.input_buffer_read_offset & ~0x7FF) / 8;
uint32_t input_remaining_bytes = input_size_bytes - seq_offset_bytes;
if (seq_offset_bytes < input_size_bytes) {
// Setup input offset and input buffer.
uint32_t input_offset_bytes = seq_offset_bytes;
auto input_buffer = in0;
if (seq_offset_bytes >= input_size_0_bytes) {
// Size overlap, select input buffer 1.
// TODO: This needs testing.
input_offset_bytes -= input_size_0_bytes;
input_buffer = in1;
}
// Still have data to read.
auto packet = input_buffer + input_offset_bytes;
assert_true(input_offset_bytes % 2048 == 0);
context.PreparePacket(packet, seq_offset_bytes,
XMA_CONTEXT_DATA::kBytesPerPacket,
sample_rate, channels);
data.input_buffer_read_offset += XMA_CONTEXT_DATA::kBytesPerPacket * 8;
input_remaining_bytes -= XMA_CONTEXT_DATA::kBytesPerPacket;
if (input_remaining_bytes <= 0) {
// Used the last of the data but prepared a packet
return 0;
}
} else {
// No more data available and no packet prepared.
return -1;
}
return input_remaining_bytes;
}
// free60 may be useful here, however it looks like it's using a different
// piece of hardware:
// https://github.com/Free60Project/libxenon/blob/master/libxenon/drivers/xenon_sound/sound.c
uint64_t XmaDecoder::ReadRegister(uint32_t addr) {
uint32_t r = addr & 0xFFFF;
XELOGAPU("ReadRegister(%.4X)", r);
// 1800h is read on startup and stored -- context? buffers?
// 1818h is read during a lock?
assert_true(r % 4 == 0);
uint32_t value = register_file_[r / 4];
// 1818 is rotating context processing # set to hardware ID of context being
// processed.
// If bit 200h is set, the locking code will possibly collide on hardware IDs
// and error out, so we should never set it (I think?).
if (r == 0x1818) {
// To prevent games from seeing a stuck XMA context, return a rotating
// number
registers_.current_context = registers_.next_context;
registers_.next_context = (registers_.next_context + 1) % kContextCount;
value = registers_.current_context;
}
value = xe::byte_swap(value);
return value;
}
void XmaDecoder::WriteRegister(uint32_t addr, uint64_t value) {
SCOPE_profile_cpu_f("apu");
uint32_t r = addr & 0xFFFF;
value = xe::byte_swap(uint32_t(value));
XELOGAPU("WriteRegister(%.4X, %.8X)", r, value);
// 1804h is written to with 0x02000000 and 0x03000000 around a lock operation
assert_true(r % 4 == 0);
register_file_[r / 4] = uint32_t(value);
if (r >= 0x1940 && r <= 0x1940 + 9 * 4) {
// Context kick command.
// This will kick off the given hardware contexts.
// Basically, this kicks the SPU and says "hey, decode that audio!"
// XMAEnableContext
// The context ID is a bit in the range of the entire context array.
for (int i = 0; value && i < 32; ++i) {
if (value & 1) {
uint32_t context_id = i + (r - 0x1940) / 4 * 32;
XmaContext& context = context_array_[context_id];
context.lock().lock();
auto context_ptr = memory()->TranslateVirtual(context.guest_ptr());
XMA_CONTEXT_DATA data(context_ptr);
XELOGAPU("AudioSystem: kicking context %d (%d/%d bytes)", context_id,
(data.input_buffer_read_offset & ~0x7FF) / 8,
(data.input_buffer_0_packet_count + data.input_buffer_1_packet_count)
* XMA_CONTEXT_DATA::kBytesPerPacket);
// Reset valid flags so our audio decoder knows to process this one.
data.input_buffer_0_valid = data.input_buffer_0_ptr != 0;
data.input_buffer_1_valid = data.input_buffer_1_ptr != 0;
data.Store(context_ptr);
context.set_kicked(true);
context.lock().unlock();
}
value >>= 1;
}
// Signal the decoder thread to start processing.
worker_fence_.Signal();
} else if (r >= 0x1A40 && r <= 0x1A40 + 9 * 4) {
// Context lock command.
// This requests a lock by flagging the context.
// XMADisableContext
for (int i = 0; value && i < 32; ++i) {
if (value & 1) {
uint32_t context_id = i + (r - 0x1A40) / 4 * 32;
XELOGAPU("AudioSystem: set context lock %d", context_id);
}
value >>= 1;
}
// Signal the decoder thread to start processing.
worker_fence_.Signal();
} else if (r >= 0x1A80 && r <= 0x1A80 + 9 * 4) {
// Context clear command.
// This will reset the given hardware contexts.
for (int i = 0; value && i < 32; ++i) {
if (value & 1) {
uint32_t context_id = i + (r - 0x1A80) / 4 * 32;
XmaContext& context = context_array_[context_id];
XELOGAPU("AudioSystem: reset context %d", context_id);
context.lock().lock();
auto context_ptr = memory()->TranslateVirtual(context.guest_ptr());
XMA_CONTEXT_DATA data(context_ptr);
context.DiscardPacket();
data.input_buffer_0_valid = 0;
data.input_buffer_1_valid = 0;
data.output_buffer_valid = 0;
data.output_buffer_read_offset = 0;
data.output_buffer_write_offset = 0;
data.Store(context_ptr);
context.lock().unlock();
}
value >>= 1;
}
} else {
value = value;
}
}
} // namespace apu
} // namespace xe

114
src/xenia/apu/xma_decoder.h Normal file
View File

@ -0,0 +1,114 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2013 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#ifndef XENIA_APU_XMA_DECODER_H_
#define XENIA_APU_XMA_DECODER_H_
#include <atomic>
#include <mutex>
#include <queue>
#include "xenia/emulator.h"
#include "xenia/xbox.h"
#include "xenia/apu/xma_context.h"
namespace xe {
namespace kernel {
class XHostThread;
} // namespace kernel
} // namespace xe
namespace xe {
namespace apu {
struct XMA_CONTEXT_DATA;
class XmaDecoder {
public:
XmaDecoder(Emulator* emulator);
virtual ~XmaDecoder();
Emulator* emulator() const { return emulator_; }
Memory* memory() const { return memory_; }
cpu::Processor* processor() const { return processor_; }
virtual X_STATUS Setup();
virtual void Shutdown();
uint32_t context_array_ptr() const {
return registers_.context_array_ptr;
}
uint32_t AllocateContext();
void ReleaseContext(uint32_t guest_ptr);
bool BlockOnContext(uint32_t guest_ptr, bool poll);
virtual uint64_t ReadRegister(uint32_t addr);
virtual void WriteRegister(uint32_t addr, uint64_t value);
protected:
virtual void Initialize();
private:
void WorkerThreadMain();
void ProcessContext(XmaContext& context, XMA_CONTEXT_DATA& data);
int PreparePacket(XmaContext& context, XMA_CONTEXT_DATA& data);
static uint64_t MMIOReadRegisterThunk(void* ppc_context, XmaDecoder* as,
uint32_t addr) {
return as->ReadRegister(addr);
}
static void MMIOWriteRegisterThunk(void* ppc_context, XmaDecoder* as,
uint32_t addr, uint64_t value) {
as->WriteRegister(addr, value);
}
protected:
Emulator* emulator_;
Memory* memory_;
cpu::Processor* processor_;
std::atomic<bool> worker_running_;
kernel::object_ref<kernel::XHostThread> worker_thread_;
xe::threading::Fence worker_fence_;
xe::mutex lock_;
// Stored little endian, accessed through 0x7FEA....
union {
struct {
union {
struct {
uint8_t ignored0[0x1800];
// 1800h; points to guest-space physical block of 320 contexts.
uint32_t context_array_ptr;
};
struct {
uint8_t ignored1[0x1818];
// 1818h; current context ID.
uint32_t current_context;
// 181Ch; next context ID to process.
uint32_t next_context;
};
};
} registers_;
uint32_t register_file_[0xFFFF / 4];
};
static const uint32_t kContextCount = 320;
XmaContext context_array_[kContextCount];
std::vector<uint32_t> xma_context_free_list_;
std::vector<uint32_t> xma_context_used_list_; // XMA contexts in use
};
} // namespace apu
} // namespace xe
#endif // XENIA_APU_XMA_DECODER_H_

View File

@ -49,10 +49,12 @@ Emulator::~Emulator() {
// Give the systems time to shutdown before we delete them.
graphics_system_->Shutdown();
audio_system_->Shutdown();
xma_decoder_->Shutdown();
input_system_.reset();
graphics_system_.reset();
audio_system_.reset();
xma_decoder_.reset();
kernel_state_.reset();
file_system_.reset();
@ -117,6 +119,8 @@ X_STATUS Emulator::Setup() {
return X_STATUS_NOT_IMPLEMENTED;
}
xma_decoder_ = std::move(std::make_unique<XmaDecoder>(this));
// Initialize the GPU.
graphics_system_ = std::move(xe::gpu::Create(this));
if (!graphics_system_) {
@ -155,6 +159,11 @@ X_STATUS Emulator::Setup() {
return result;
}
result = xma_decoder_->Setup();
if (result) {
return result;
}
// HLE kernel modules.
kernel_state_->LoadKernelModule<kernel::XboxkrnlModule>();
kernel_state_->LoadKernelModule<kernel::XamModule>();

View File

@ -21,6 +21,7 @@
namespace xe {
namespace apu {
class AudioSystem;
class XmaDecoder;
} // namespace apu
namespace cpu {
class ExportResolver;
@ -55,6 +56,7 @@ class Emulator {
cpu::Processor* processor() const { return processor_.get(); }
apu::AudioSystem* audio_system() const { return audio_system_.get(); }
apu::XmaDecoder* xma_decoder() const { return xma_decoder_.get(); }
gpu::GraphicsSystem* graphics_system() const {
return graphics_system_.get();
}
@ -88,6 +90,7 @@ class Emulator {
std::unique_ptr<cpu::Processor> processor_;
std::unique_ptr<apu::AudioSystem> audio_system_;
std::unique_ptr<apu::XmaDecoder> xma_decoder_;
std::unique_ptr<gpu::GraphicsSystem> graphics_system_;
std::unique_ptr<hid::InputSystem> input_system_;

View File

@ -61,8 +61,8 @@ SHIM_CALL XMACreateContext_shim(PPCContext* ppc_context,
XELOGD("XMACreateContext(%.8X)", context_out_ptr);
auto audio_system = kernel_state->emulator()->audio_system();
uint32_t context_ptr = audio_system->AllocateXmaContext();
auto xma_decoder = kernel_state->emulator()->xma_decoder();
uint32_t context_ptr = xma_decoder->AllocateContext();
SHIM_SET_MEM_32(context_out_ptr, context_ptr);
if (!context_ptr) {
SHIM_SET_RETURN_32(X_STATUS_NO_MEMORY);
@ -78,32 +78,32 @@ SHIM_CALL XMAReleaseContext_shim(PPCContext* ppc_context,
XELOGD("XMAReleaseContext(%.8X)", context_ptr);
auto audio_system = kernel_state->emulator()->audio_system();
audio_system->ReleaseXmaContext(context_ptr);
auto xma_decoder = kernel_state->emulator()->xma_decoder();
xma_decoder->ReleaseContext(context_ptr);
SHIM_SET_RETURN_32(0);
}
void StoreXmaContextIndexedRegister(KernelState* kernel_state,
uint32_t base_reg, uint32_t context_ptr) {
auto audio_system = kernel_state->emulator()->audio_system();
uint32_t hw_index = (context_ptr - audio_system->xma_context_array_ptr()) /
sizeof(XMAContextData);
auto xma_decoder = kernel_state->emulator()->xma_decoder();
uint32_t hw_index = (context_ptr - xma_decoder->context_array_ptr()) /
sizeof(XMA_CONTEXT_DATA);
uint32_t reg_num = base_reg + (hw_index >> 5) * 4;
uint32_t reg_value = 1 << (hw_index & 0x1F);
audio_system->WriteRegister(reg_num, xe::byte_swap(reg_value));
xma_decoder->WriteRegister(reg_num, xe::byte_swap(reg_value));
}
struct X_XMA_LOOP_DATA {
struct XMA_LOOP_DATA {
xe::be<uint32_t> loop_start;
xe::be<uint32_t> loop_end;
xe::be<uint8_t> loop_count;
xe::be<uint8_t> loop_subframe_end;
xe::be<uint8_t> loop_subframe_skip;
};
static_assert_size(X_XMA_LOOP_DATA, 12);
static_assert_size(XMA_LOOP_DATA, 12);
struct X_XMA_CONTEXT_INIT {
struct XMA_CONTEXT_INIT {
xe::be<uint32_t> input_buffer_0_ptr;
xe::be<uint32_t> input_buffer_0_packet_count;
xe::be<uint32_t> input_buffer_1_ptr;
@ -115,9 +115,9 @@ struct X_XMA_CONTEXT_INIT {
xe::be<uint32_t> subframe_decode_count;
xe::be<uint32_t> channel_count;
xe::be<uint32_t> sample_rate;
X_XMA_LOOP_DATA loop_data;
XMA_LOOP_DATA loop_data;
};
static_assert_size(X_XMA_CONTEXT_INIT, 56);
static_assert_size(XMA_CONTEXT_INIT, 56);
SHIM_CALL XMAInitializeContext_shim(PPCContext* ppc_context,
KernelState* kernel_state) {
@ -126,10 +126,10 @@ SHIM_CALL XMAInitializeContext_shim(PPCContext* ppc_context,
XELOGD("XMAInitializeContext(%.8X, %.8X)", context_ptr, context_init_ptr);
std::memset(SHIM_MEM_ADDR(context_ptr), 0, sizeof(XMAContextData));
std::memset(SHIM_MEM_ADDR(context_ptr), 0, sizeof(XMA_CONTEXT_DATA));
XMAContextData context(SHIM_MEM_ADDR(context_ptr));
auto context_init = (X_XMA_CONTEXT_INIT*)SHIM_MEM_ADDR(context_init_ptr);
XMA_CONTEXT_DATA context(SHIM_MEM_ADDR(context_ptr));
auto context_init = (XMA_CONTEXT_INIT*)SHIM_MEM_ADDR(context_init_ptr);
context.input_buffer_0_ptr = context_init->input_buffer_0_ptr;
context.input_buffer_0_packet_count = context_init->input_buffer_0_packet_count;
@ -164,8 +164,8 @@ SHIM_CALL XMASetLoopData_shim(PPCContext* ppc_context,
XELOGD("XMASetLoopData(%.8X, %.8X)", context_ptr, loop_data_ptr);
XMAContextData context(SHIM_MEM_ADDR(context_ptr));
auto loop_data = (X_XMA_LOOP_DATA*)SHIM_MEM_ADDR(loop_data_ptr);
XMA_CONTEXT_DATA context(SHIM_MEM_ADDR(context_ptr));
auto loop_data = (XMA_CONTEXT_DATA*)SHIM_MEM_ADDR(loop_data_ptr);
context.loop_start = loop_data->loop_start;
context.loop_end = loop_data->loop_end;
@ -184,7 +184,7 @@ SHIM_CALL XMAGetInputBufferReadOffset_shim(PPCContext* ppc_context,
XELOGD("XMAGetInputBufferReadOffset(%.8X)", context_ptr);
XMAContextData context(SHIM_MEM_ADDR(context_ptr));
XMA_CONTEXT_DATA context(SHIM_MEM_ADDR(context_ptr));
uint32_t result = context.input_buffer_read_offset;
@ -198,7 +198,7 @@ SHIM_CALL XMASetInputBufferReadOffset_shim(PPCContext* ppc_context,
XELOGD("XMASetInputBufferReadOffset(%.8X, %.8X)", context_ptr, value);
XMAContextData context(SHIM_MEM_ADDR(context_ptr));
XMA_CONTEXT_DATA context(SHIM_MEM_ADDR(context_ptr));
context.input_buffer_read_offset = value;
@ -216,7 +216,7 @@ SHIM_CALL XMASetInputBuffer0_shim(PPCContext* ppc_context,
XELOGD("XMASetInputBuffer0(%.8X, %.8X, %d)", context_ptr, buffer_ptr,
block_count);
XMAContextData context(SHIM_MEM_ADDR(context_ptr));
XMA_CONTEXT_DATA context(SHIM_MEM_ADDR(context_ptr));
context.input_buffer_0_ptr = buffer_ptr;
context.input_buffer_0_packet_count = block_count;
@ -234,7 +234,7 @@ SHIM_CALL XMAIsInputBuffer0Valid_shim(PPCContext* ppc_context,
XELOGD("XMAIsInputBuffer0Valid(%.8X)", context_ptr);
XMAContextData context(SHIM_MEM_ADDR(context_ptr));
XMA_CONTEXT_DATA context(SHIM_MEM_ADDR(context_ptr));
uint32_t result = context.input_buffer_0_valid;
@ -247,7 +247,7 @@ SHIM_CALL XMASetInputBuffer0Valid_shim(PPCContext* ppc_context,
XELOGD("XMASetInputBuffer0Valid(%.8X)", context_ptr);
XMAContextData context(SHIM_MEM_ADDR(context_ptr));
XMA_CONTEXT_DATA context(SHIM_MEM_ADDR(context_ptr));
context.input_buffer_0_valid = 1;
@ -265,7 +265,7 @@ SHIM_CALL XMASetInputBuffer1_shim(PPCContext* ppc_context,
XELOGD("XMASetInputBuffer1(%.8X, %.8X, %d)", context_ptr, buffer_ptr,
block_count);
XMAContextData context(SHIM_MEM_ADDR(context_ptr));
XMA_CONTEXT_DATA context(SHIM_MEM_ADDR(context_ptr));
context.input_buffer_1_ptr = buffer_ptr;
context.input_buffer_1_packet_count = block_count;
@ -283,7 +283,7 @@ SHIM_CALL XMAIsInputBuffer1Valid_shim(PPCContext* ppc_context,
XELOGD("XMAIsInputBuffer1Valid(%.8X)", context_ptr);
XMAContextData context(SHIM_MEM_ADDR(context_ptr));
XMA_CONTEXT_DATA context(SHIM_MEM_ADDR(context_ptr));
uint32_t result = context.input_buffer_1_valid;
@ -296,7 +296,7 @@ SHIM_CALL XMASetInputBuffer1Valid_shim(PPCContext* ppc_context,
XELOGD("XMASetInputBuffer1Valid(%.8X)", context_ptr);
XMAContextData context(SHIM_MEM_ADDR(context_ptr));
XMA_CONTEXT_DATA context(SHIM_MEM_ADDR(context_ptr));
context.input_buffer_1_valid = 1;
@ -311,7 +311,7 @@ SHIM_CALL XMAIsOutputBufferValid_shim(PPCContext* ppc_context,
XELOGD("XMAIsOutputBufferValid(%.8X)", context_ptr);
XMAContextData context(SHIM_MEM_ADDR(context_ptr));
XMA_CONTEXT_DATA context(SHIM_MEM_ADDR(context_ptr));
uint32_t result = context.output_buffer_valid;
@ -324,7 +324,7 @@ SHIM_CALL XMASetOutputBufferValid_shim(PPCContext* ppc_context,
XELOGD("XMASetOutputBufferValid(%.8X)", context_ptr);
XMAContextData context(SHIM_MEM_ADDR(context_ptr));
XMA_CONTEXT_DATA context(SHIM_MEM_ADDR(context_ptr));
context.output_buffer_valid = 1;
@ -339,7 +339,7 @@ SHIM_CALL XMAGetOutputBufferReadOffset_shim(PPCContext* ppc_context,
XELOGD("XMAGetOutputBufferReadOffset(%.8X)", context_ptr);
XMAContextData context(SHIM_MEM_ADDR(context_ptr));
XMA_CONTEXT_DATA context(SHIM_MEM_ADDR(context_ptr));
uint32_t result = context.output_buffer_read_offset;
@ -353,7 +353,7 @@ SHIM_CALL XMASetOutputBufferReadOffset_shim(PPCContext* ppc_context,
XELOGD("XMASetOutputBufferReadOffset(%.8X, %.8X)", context_ptr, value);
XMAContextData context(SHIM_MEM_ADDR(context_ptr));
XMA_CONTEXT_DATA context(SHIM_MEM_ADDR(context_ptr));
context.output_buffer_read_offset = value;
@ -368,7 +368,7 @@ SHIM_CALL XMAGetOutputBufferWriteOffset_shim(PPCContext* ppc_context,
XELOGD("XMAGetOutputBufferWriteOffset(%.8X)", context_ptr);
XMAContextData context(SHIM_MEM_ADDR(context_ptr));
XMA_CONTEXT_DATA context(SHIM_MEM_ADDR(context_ptr));
uint32_t result = context.output_buffer_write_offset;
@ -381,7 +381,7 @@ SHIM_CALL XMAGetPacketMetadata_shim(PPCContext* ppc_context,
XELOGD("XMAGetPacketMetadata(%.8X)", context_ptr);
XMAContextData context(SHIM_MEM_ADDR(context_ptr));
XMA_CONTEXT_DATA context(SHIM_MEM_ADDR(context_ptr));
uint32_t result = context.packet_metadata;
@ -408,8 +408,8 @@ SHIM_CALL XMADisableContext_shim(PPCContext* ppc_context,
X_HRESULT result = X_E_SUCCESS;
StoreXmaContextIndexedRegister(kernel_state, 0x1A40, context_ptr);
if (!kernel_state->emulator()->audio_system()->BlockOnXmaContext(context_ptr,
!wait)) {
if (!kernel_state->emulator()->xma_decoder()->BlockOnContext(context_ptr,
!wait)) {
result = X_E_FALSE;
}
@ -423,7 +423,7 @@ SHIM_CALL XMABlockWhileInUse_shim(PPCContext* ppc_context,
XELOGD("XMABlockWhileInUse(%.8X)", context_ptr);
do {
XMAContextData context(SHIM_MEM_ADDR(context_ptr));
XMA_CONTEXT_DATA context(SHIM_MEM_ADDR(context_ptr));
if (!context.input_buffer_0_valid && !context.input_buffer_1_valid) {
break;
}