Audio now works a bit better, but is still broken :/
This commit is contained in:
parent
a93325434a
commit
e419d314b3
|
@ -181,7 +181,7 @@ void AudioSystem::WorkerThreadMain() {
|
|||
|
||||
void AudioSystem::DecoderThreadMain() {
|
||||
while (decoder_running_) {
|
||||
// Wait for the fence
|
||||
// Wait for a kick from WriteRegister.
|
||||
decoder_fence_.Wait();
|
||||
|
||||
// Check to see if we're supposed to exit
|
||||
|
@ -192,128 +192,14 @@ void AudioSystem::DecoderThreadMain() {
|
|||
// Okay, let's loop through XMA contexts to find ones we need to decode!
|
||||
for (uint32_t n = 0; n < kXmaContextCount; n++) {
|
||||
XMAContext& context = xma_context_array_[n];
|
||||
if (!context.lock.try_lock()) {
|
||||
// Someone else has the lock.
|
||||
continue;
|
||||
}
|
||||
|
||||
// Skip unused contexts
|
||||
if (!context.in_use) {
|
||||
if (context.in_use) {
|
||||
context.lock.lock();
|
||||
auto context_ptr = memory()->TranslateVirtual(context.guest_ptr);
|
||||
XMAContextData data(context_ptr);
|
||||
ProcessXmaContext(context, data);
|
||||
data.Store(context_ptr);
|
||||
context.lock.unlock();
|
||||
continue;
|
||||
}
|
||||
|
||||
uint8_t* ptr = memory()->TranslatePhysical(context.guest_ptr);
|
||||
auto data = XMAContextData(ptr);
|
||||
|
||||
if (data.input_buffer_0_valid || data.input_buffer_1_valid) {
|
||||
// A buffer is valid. Run the decoder!
|
||||
|
||||
// Reset valid flags
|
||||
data.input_buffer_0_valid = 0;
|
||||
data.input_buffer_1_valid = 0;
|
||||
data.output_buffer_valid = 0;
|
||||
|
||||
// Translate pointers for future use.
|
||||
auto in0 = memory()->TranslatePhysical(data.input_buffer_0_ptr);
|
||||
auto in1 = memory()->TranslatePhysical(data.input_buffer_1_ptr);
|
||||
auto out = memory()->TranslatePhysical(data.output_buffer_ptr);
|
||||
|
||||
// What I see:
|
||||
// XMA outputs 2 bytes per sample
|
||||
// 512 samples per frame (128 per subframe)
|
||||
// Max output size is data.output_buffer_block_count * 256
|
||||
|
||||
// This decoder is fed packets (max 4095 per buffer)
|
||||
// Packets contain "some" frames
|
||||
// 32bit header (big endian)
|
||||
|
||||
// Frames are the smallest thing the SPUs can decode.
|
||||
// They usually can span packets (libav handles this)
|
||||
|
||||
// Sample rates (data.sample_rate):
|
||||
// 0 - 24 kHz ?
|
||||
// 1 - 32 kHz
|
||||
// 2 - 44.1 kHz ?
|
||||
// 3 - 48 kHz ?
|
||||
|
||||
// SPUs also support stereo decoding. (data.is_stereo)
|
||||
int retries_remaining = 2;
|
||||
while (retries_remaining) {
|
||||
// Initial check - see if we've finished with the input
|
||||
// TODO - Probably need to move this, I think it might skip the very
|
||||
// last packet (see the call to PreparePacket)
|
||||
size_t input_size = (data.input_buffer_0_block_count +
|
||||
data.input_buffer_1_block_count) *
|
||||
2048;
|
||||
size_t input_offset = (data.input_buffer_read_offset / 8 - 4);
|
||||
size_t input_remaining = input_size - input_offset;
|
||||
if (input_offset >= input_size) {
|
||||
// We're finished. Break.
|
||||
break;
|
||||
}
|
||||
|
||||
// Now check the output buffer.
|
||||
size_t output_size = data.output_buffer_block_count * 256;
|
||||
size_t output_offset = data.output_buffer_write_offset * 256;
|
||||
size_t output_remaining = output_size - output_offset;
|
||||
if (output_remaining == 0) {
|
||||
// Can't write any more data. Break.
|
||||
// The game will kick us again with a new output buffer later.
|
||||
break;
|
||||
}
|
||||
|
||||
// This'll copy audio samples into the output buffer.
|
||||
// The samples need to be 2 bytes long!
|
||||
// Copies one frame at a time, so keep calling this until size == 0
|
||||
int read = context.decoder->DecodePacket(out, output_offset,
|
||||
output_remaining);
|
||||
if (read < 0) {
|
||||
// Sometimes the decoder will fail on a packet. I think it's
|
||||
// looking for cross-packet frames and failing. If you run it again
|
||||
// on the same packet it'll work though.
|
||||
XELOGAPU("APU failed to decode packet (returned %.8X)", -read);
|
||||
--retries_remaining;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (read == 0) {
|
||||
// Select sample rate.
|
||||
int sample_rate = 0;
|
||||
if (data.sample_rate == 0) {
|
||||
sample_rate = 24000;
|
||||
} else if (data.sample_rate == 1) {
|
||||
sample_rate = 32000;
|
||||
} else if (data.sample_rate == 2) {
|
||||
sample_rate = 44100;
|
||||
} else if (data.sample_rate == 3) {
|
||||
sample_rate = 48000;
|
||||
}
|
||||
|
||||
// Channels
|
||||
int channels = 1;
|
||||
if (data.is_stereo == 1) {
|
||||
channels = 2;
|
||||
}
|
||||
|
||||
// New packet time.
|
||||
// TODO: Select input buffer 1 if necessary.
|
||||
auto packet = in0 + input_offset;
|
||||
context.decoder->PreparePacket(packet, 2048, sample_rate, channels);
|
||||
input_offset += 2048;
|
||||
}
|
||||
|
||||
output_offset += read;
|
||||
|
||||
// Copy the variables we changed back to the context.
|
||||
data.input_buffer_read_offset = (input_offset + 4) * 8;
|
||||
data.output_buffer_write_offset = output_offset / 256;
|
||||
}
|
||||
|
||||
data.Store(ptr);
|
||||
}
|
||||
|
||||
context.lock.unlock();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -437,6 +323,140 @@ void AudioSystem::UnregisterClient(size_t index) {
|
|||
ResetEvent(client_wait_handles_[index]);
|
||||
}
|
||||
|
||||
void AudioSystem::ProcessXmaContext(XMAContext& context, XMAContextData& data) {
|
||||
if (!context.in_use) {
|
||||
// Skip unused contexts.
|
||||
return;
|
||||
}
|
||||
|
||||
SCOPE_profile_cpu_f("apu");
|
||||
|
||||
// Translate pointers for future use.
|
||||
uint8_t* in0 = data.input_buffer_0_valid
|
||||
? memory()->TranslatePhysical(data.input_buffer_0_ptr)
|
||||
: nullptr;
|
||||
uint8_t* in1 = data.input_buffer_1_valid
|
||||
? memory()->TranslatePhysical(data.input_buffer_1_ptr)
|
||||
: nullptr;
|
||||
uint8_t* out = memory()->TranslatePhysical(data.output_buffer_ptr);
|
||||
|
||||
// What I see:
|
||||
// XMA outputs 2 bytes per sample
|
||||
// 512 samples per frame (128 per subframe)
|
||||
// Max output size is data.output_buffer_block_count * 256
|
||||
|
||||
// This decoder is fed packets (max 4095 per buffer)
|
||||
// Packets contain "some" frames
|
||||
// 32bit header (big endian)
|
||||
|
||||
// Frames are the smallest thing the SPUs can decode.
|
||||
// They usually can span packets (libav handles this)
|
||||
|
||||
// Sample rates (data.sample_rate):
|
||||
// 0 - 24 kHz ?
|
||||
// 1 - 32 kHz
|
||||
// 2 - 44.1 kHz ?
|
||||
// 3 - 48 kHz ?
|
||||
|
||||
// SPUs also support stereo decoding. (data.is_stereo)
|
||||
while (data.output_buffer_valid) {
|
||||
// Check the output buffer - we cannot decode anything else if it's
|
||||
// unavailable.
|
||||
// Output buffers are in frames.
|
||||
uint32_t output_size_bytes = data.output_buffer_block_count * 256;
|
||||
uint32_t output_offset_bytes = data.output_buffer_write_offset * 256;
|
||||
uint32_t output_remaining_bytes = output_size_bytes - output_offset_bytes;
|
||||
if (!output_remaining_bytes) {
|
||||
// Can't write any more data. Break.
|
||||
// The game will kick us again with a new output buffer later.
|
||||
data.output_buffer_valid = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
// This'll copy audio samples into the output buffer.
|
||||
// The samples need to be 2 bytes long!
|
||||
// Copies one frame at a time, so keep calling this until size == 0
|
||||
int read_bytes = 0;
|
||||
int decode_attempts_remaining = 3;
|
||||
while (decode_attempts_remaining) {
|
||||
read_bytes = context.decoder->DecodePacket(out, output_offset_bytes,
|
||||
output_remaining_bytes);
|
||||
if (read_bytes >= 0) {
|
||||
// Ok.
|
||||
break;
|
||||
} else {
|
||||
// Sometimes the decoder will fail on a packet. I think it's
|
||||
// looking for cross-packet frames and failing. If you run it again
|
||||
// on the same packet it'll work though.
|
||||
XELOGAPU("APU failed to decode packet (returned %.8X)", -read_bytes);
|
||||
--decode_attempts_remaining;
|
||||
}
|
||||
}
|
||||
if (!decode_attempts_remaining) {
|
||||
// Failed out.
|
||||
if (data.input_buffer_0_valid || data.input_buffer_1_valid) {
|
||||
// There's new data available - maybe we'll be ok if we decode it?
|
||||
read_bytes = 0;
|
||||
context.decoder->DiscardPacket();
|
||||
} else {
|
||||
// No data and hosed - bail.
|
||||
break;
|
||||
}
|
||||
}
|
||||
data.output_buffer_write_offset += uint32_t(read_bytes) / 256;
|
||||
|
||||
// If we need more data and the input buffers have it, grab it.
|
||||
if (read_bytes) {
|
||||
// Still outputting.
|
||||
continue;
|
||||
} else if (data.input_buffer_0_valid || data.input_buffer_1_valid) {
|
||||
// Done with previous packet, so grab a new one.
|
||||
int sample_rate = 0;
|
||||
if (data.sample_rate == 0) {
|
||||
sample_rate = 24000;
|
||||
} else if (data.sample_rate == 1) {
|
||||
sample_rate = 32000;
|
||||
} else if (data.sample_rate == 2) {
|
||||
sample_rate = 44100;
|
||||
} else if (data.sample_rate == 3) {
|
||||
sample_rate = 48000;
|
||||
}
|
||||
int channels = data.is_stereo ? 2 : 1;
|
||||
|
||||
// See if we've finished with the input
|
||||
// TODO - Probably need to move this, I think it might skip the very
|
||||
// last packet (see the call to PreparePacket)
|
||||
// Block count is in frames, so expand by
|
||||
// samples_per_frame*bytes_per_sample*bits_per_byte.
|
||||
uint32_t input_size_bytes =
|
||||
(data.input_buffer_0_block_count + data.input_buffer_1_block_count) *
|
||||
2048;
|
||||
// Input read offset is in bits. Typically starts at 32 (4 bytes).
|
||||
uint32_t input_offset_bytes =
|
||||
(data.input_buffer_read_offset & ~0x7FF) / 8;
|
||||
if (input_offset_bytes < input_size_bytes) {
|
||||
// Still have data to read.
|
||||
// TODO: Select input buffer 1 if necessary.
|
||||
auto packet = in0 + input_offset_bytes;
|
||||
context.decoder->PreparePacket(packet, 2048, sample_rate, channels);
|
||||
data.input_buffer_read_offset += 2048 * 8;
|
||||
if (input_offset_bytes + 2048 >= input_size_bytes) {
|
||||
// Used the last of the data.
|
||||
data.input_buffer_0_valid = 0;
|
||||
data.input_buffer_1_valid = 0;
|
||||
}
|
||||
} else {
|
||||
// No more data available (for now).
|
||||
data.input_buffer_0_valid = 0;
|
||||
data.input_buffer_1_valid = 0;
|
||||
}
|
||||
} else {
|
||||
// Decoder is out of data and there's no more to give.
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// free60 may be useful here, however it looks like it's using a different
|
||||
// piece of hardware:
|
||||
// https://github.com/Free60Project/libxenon/blob/master/libxenon/drivers/xenon_sound/sound.c
|
||||
|
@ -483,7 +503,7 @@ void AudioSystem::WriteRegister(uint32_t addr, uint64_t value) {
|
|||
// Basically, this kicks the SPU and says "hey, decode that audio!"
|
||||
// XMAEnableContext
|
||||
|
||||
// The context ID is a bit in the range of the entire context array
|
||||
// The context ID is a bit in the range of the entire context array.
|
||||
for (int i = 0; value && i < 32; ++i) {
|
||||
if (value & 1) {
|
||||
uint32_t context_id = i + (r - 0x1940) / 4 * 32;
|
||||
|
@ -495,10 +515,10 @@ void AudioSystem::WriteRegister(uint32_t addr, uint64_t value) {
|
|||
|
||||
XELOGAPU(
|
||||
"AudioSystem: kicking context %d (%d/%d bytes)", context_id,
|
||||
data.input_buffer_read_offset,
|
||||
data.input_buffer_read_offset / 8,
|
||||
data.input_buffer_0_block_count * XMAContextData::kBytesPerBlock);
|
||||
|
||||
// Reset valid flags so our audio decoder knows to process this one
|
||||
// Reset valid flags so our audio decoder knows to process this one.
|
||||
data.input_buffer_0_valid = data.input_buffer_0_ptr != 0;
|
||||
data.input_buffer_1_valid = data.input_buffer_1_ptr != 0;
|
||||
data.output_buffer_write_offset = 0;
|
||||
|
@ -519,26 +539,34 @@ void AudioSystem::WriteRegister(uint32_t addr, uint64_t value) {
|
|||
if (value & 1) {
|
||||
uint32_t context_id = i + (r - 0x1A40) / 4 * 32;
|
||||
XELOGAPU("AudioSystem: set context lock %d", context_id);
|
||||
|
||||
// TODO: Find the correct way to lock/unlock this.
|
||||
// I thought we could lock it here, unlock it in the kick but that
|
||||
// doesn't seem to work
|
||||
XMAContext& context = xma_context_array_[context_id];
|
||||
}
|
||||
value >>= 1;
|
||||
}
|
||||
|
||||
// Signal the decoder thread to start processing.
|
||||
decoder_fence_.Signal();
|
||||
} else if (r >= 0x1A80 && r <= 0x1A80 + 9 * 4) {
|
||||
// Context clear command.
|
||||
// This will reset the given hardware contexts.
|
||||
for (int i = 0; value && i < 32; ++i) {
|
||||
if (value & 1) {
|
||||
uint32_t context_id = i + (r - 0x1A80) / 4 * 32;
|
||||
XMAContext& context = xma_context_array_[context_id];
|
||||
XELOGAPU("AudioSystem: reset context %d", context_id);
|
||||
|
||||
// TODO(benvanik): something?
|
||||
uint32_t guest_ptr =
|
||||
registers_.xma_context_array_ptr + context_id * kXmaContextSize;
|
||||
auto context_ptr = memory()->TranslateVirtual(guest_ptr);
|
||||
context.lock.lock();
|
||||
auto context_ptr = memory()->TranslateVirtual(context.guest_ptr);
|
||||
XMAContextData data(context_ptr);
|
||||
|
||||
context.decoder->DiscardPacket();
|
||||
data.input_buffer_0_valid = 0;
|
||||
data.input_buffer_1_valid = 0;
|
||||
data.output_buffer_valid = 0;
|
||||
|
||||
data.Store(context_ptr);
|
||||
context.lock.unlock();
|
||||
}
|
||||
value >>= 1;
|
||||
}
|
||||
|
|
|
@ -111,6 +111,9 @@ struct XMAContextData {
|
|||
static_assert(sizeof(XMAContextData) == 4 * 10, "Must be packed");
|
||||
|
||||
class AudioSystem {
|
||||
protected:
|
||||
struct XMAContext;
|
||||
|
||||
public:
|
||||
virtual ~AudioSystem();
|
||||
|
||||
|
@ -147,6 +150,8 @@ class AudioSystem {
|
|||
void WorkerThreadMain();
|
||||
void DecoderThreadMain();
|
||||
|
||||
void ProcessXmaContext(XMAContext& context, XMAContextData& data);
|
||||
|
||||
static uint64_t MMIOReadRegisterThunk(AudioSystem* as, uint32_t addr) {
|
||||
return as->ReadRegister(addr);
|
||||
}
|
||||
|
|
|
@ -889,21 +889,23 @@ SHIM_CALL KeWaitForMultipleObjects_shim(PPCContext* ppc_state,
|
|||
|
||||
X_STATUS result = X_STATUS_SUCCESS;
|
||||
|
||||
std::vector<object_ref<XObject>> objects(count);
|
||||
std::vector<object_ref<XObject>> objects;
|
||||
for (uint32_t n = 0; n < count; n++) {
|
||||
uint32_t object_ptr_ptr = SHIM_MEM_32(objects_ptr + n * 4);
|
||||
void* object_ptr = SHIM_MEM_ADDR(object_ptr_ptr);
|
||||
objects[n] = XObject::GetNativeObject<XObject>(state, object_ptr);
|
||||
if (!objects[n]) {
|
||||
auto object_ref = XObject::GetNativeObject<XObject>(state, object_ptr);
|
||||
if (!object_ref) {
|
||||
SHIM_SET_RETURN_32(X_STATUS_INVALID_PARAMETER);
|
||||
return;
|
||||
}
|
||||
objects.push_back(std::move(object_ref));
|
||||
}
|
||||
|
||||
uint64_t timeout = timeout_ptr ? SHIM_MEM_64(timeout_ptr) : 0;
|
||||
result = XObject::WaitMultiple(
|
||||
count, reinterpret_cast<XObject**>(objects.data()), wait_type,
|
||||
wait_reason, processor_mode, alertable, timeout_ptr ? &timeout : nullptr);
|
||||
result = XObject::WaitMultiple(uint32_t(objects.size()),
|
||||
reinterpret_cast<XObject**>(objects.data()),
|
||||
wait_type, wait_reason, processor_mode,
|
||||
alertable, timeout_ptr ? &timeout : nullptr);
|
||||
|
||||
SHIM_SET_RETURN_32(result);
|
||||
}
|
||||
|
|
|
@ -146,7 +146,7 @@ X_STATUS XObject::WaitMultiple(uint32_t count, XObject** objects,
|
|||
uint32_t wait_type, uint32_t wait_reason,
|
||||
uint32_t processor_mode, uint32_t alertable,
|
||||
uint64_t* opt_timeout) {
|
||||
void** wait_handles = (void**)alloca(sizeof(void*) * count);
|
||||
HANDLE* wait_handles = (HANDLE*)alloca(sizeof(HANDLE) * count);
|
||||
for (uint32_t n = 0; n < count; n++) {
|
||||
wait_handles[n] = objects[n]->GetWaitHandle();
|
||||
assert_not_null(wait_handles[n]);
|
||||
|
|
Loading…
Reference in New Issue