Speeding up inner loop.

This commit is contained in:
Ben Vanik 2015-05-29 17:05:43 -07:00
parent f54dbe52c2
commit 0a83b49744
2 changed files with 40 additions and 37 deletions

View File

@ -13,7 +13,7 @@
#include "xenia/base/logging.h" #include "xenia/base/logging.h"
extern "C" { extern "C" {
#include "libavcodec/avcodec.h" #include "libavcodec/avcodec.h"
} }
// Credits for most of this code goes to: // Credits for most of this code goes to:
@ -22,8 +22,11 @@ extern "C" {
namespace xe { namespace xe {
namespace apu { namespace apu {
AudioDecoder::AudioDecoder() : codec_(nullptr), context_(nullptr), AudioDecoder::AudioDecoder()
decoded_frame_(nullptr), packet_(nullptr) {} : codec_(nullptr),
context_(nullptr),
decoded_frame_(nullptr),
packet_(nullptr) {}
AudioDecoder::~AudioDecoder() { AudioDecoder::~AudioDecoder() {
if (context_) { if (context_) {
@ -87,7 +90,8 @@ int AudioDecoder::Initialize(int bits) {
// Current frame stuff whatever // Current frame stuff whatever
// samples per frame * 2 max channels * output bytes // samples per frame * 2 max channels * output bytes
current_frame_ = new uint8_t[XMAContextData::kSamplesPerFrame * 2 * (bits/8)]; current_frame_ =
new uint8_t[XMAContextData::kSamplesPerFrame * 2 * (bits / 8)];
current_frame_pos_ = 0; current_frame_pos_ = 0;
frame_samples_size_ = 0; frame_samples_size_ = 0;
@ -100,7 +104,7 @@ int AudioDecoder::Initialize(int bits) {
return 0; return 0;
} }
int AudioDecoder::PreparePacket(uint8_t* input, size_t seq_offset, size_t size, int AudioDecoder::PreparePacket(uint8_t *input, size_t seq_offset, size_t size,
int sample_rate, int channels) { int sample_rate, int channels) {
if (size != XMAContextData::kBytesPerBlock) { if (size != XMAContextData::kBytesPerBlock) {
// Invalid packet size! // Invalid packet size!
@ -116,7 +120,7 @@ int AudioDecoder::PreparePacket(uint8_t* input, size_t seq_offset, size_t size,
// Modify the packet header so it's WMAPro compatible // Modify the packet header so it's WMAPro compatible
*((int *)packet_data_) = (((seq_offset & 0x7800) | 0x400) >> 7) | *((int *)packet_data_) = (((seq_offset & 0x7800) | 0x400) >> 7) |
(*((int*)packet_data_) & 0xFFFEFF08); (*((int *)packet_data_) & 0xFFFEFF08);
packet_->data = packet_data_; packet_->data = packet_data_;
packet_->size = XMAContextData::kBytesPerBlock; packet_->size = XMAContextData::kBytesPerBlock;
@ -146,7 +150,8 @@ void AudioDecoder::DiscardPacket() {
} }
} }
int AudioDecoder::DecodePacket(uint8_t* output, size_t output_offset, size_t output_size) { int AudioDecoder::DecodePacket(uint8_t *output, size_t output_offset,
size_t output_size) {
size_t to_copy = 0; size_t to_copy = 0;
size_t original_offset = output_offset; size_t original_offset = output_offset;
uint32_t sample_size = bits_ / 8; uint32_t sample_size = bits_ / 8;
@ -154,7 +159,8 @@ int AudioDecoder::DecodePacket(uint8_t* output, size_t output_offset, size_t out
// We're holding onto an already-decoded frame. Copy it out. // We're holding onto an already-decoded frame. Copy it out.
if (current_frame_pos_ != frame_samples_size_) { if (current_frame_pos_ != frame_samples_size_) {
to_copy = std::min(output_size, frame_samples_size_ - current_frame_pos_); to_copy = std::min(output_size, frame_samples_size_ - current_frame_pos_);
memcpy(output + output_offset, current_frame_ + current_frame_pos_, to_copy); memcpy(output + output_offset, current_frame_ + current_frame_pos_,
to_copy);
current_frame_pos_ += to_copy; current_frame_pos_ += to_copy;
output_size -= to_copy; output_size -= to_copy;
@ -165,7 +171,8 @@ int AudioDecoder::DecodePacket(uint8_t* output, size_t output_offset, size_t out
int got_frame = 0; int got_frame = 0;
// Decode the current frame // Decode the current frame
int len = avcodec_decode_audio4(context_, decoded_frame_, &got_frame, packet_); int len =
avcodec_decode_audio4(context_, decoded_frame_, &got_frame, packet_);
if (len < 0) { if (len < 0) {
// Error in codec (bad sample rate or something) // Error in codec (bad sample rate or something)
return len; return len;
@ -188,37 +195,36 @@ int AudioDecoder::DecodePacket(uint8_t* output, size_t output_offset, size_t out
// Check the returned buffer size // Check the returned buffer size
if (av_samples_get_buffer_size(NULL, context_->channels, if (av_samples_get_buffer_size(NULL, context_->channels,
decoded_frame_->nb_samples, decoded_frame_->nb_samples,
context_->sample_fmt, 1) context_->sample_fmt, 1) !=
!= context_->channels * decoded_frame_->nb_samples * sizeof(float)) { context_->channels * decoded_frame_->nb_samples * sizeof(float)) {
return -4; return -4;
} }
// Output sample array // Output sample array
float* sample_array = (float *)decoded_frame_->data[0]; float *sample_array = (float *)decoded_frame_->data[0];
// Loop through every sample, convert and drop it into the output array // Loop through every sample, convert and drop it into the output array.
if (sample_size == 2) {
for (int i = 0; i < decoded_frame_->nb_samples; i++) { for (int i = 0; i < decoded_frame_->nb_samples; i++) {
// Raw sample should be within [-1, 1] // Raw sample should be within [-1, 1].
float fRawSample = sample_array[i];
// Clamp it, just in case. // Clamp it, just in case.
fRawSample = std::min( 1.f, fRawSample); float raw_sample = xe::saturate(sample_array[i]);
fRawSample = std::max(-1.f, fRawSample);
float fScaledSample = fRawSample * (1 << (bits_ - 1)); // Convert the sample and output it in big endian.
float scaled_sample = raw_sample * (1 << (bits_ - 1));
// Convert the sample and output it in big endian int sample = static_cast<int>(scaled_sample);
int sample = (int)fScaledSample; xe::store_and_swap<uint16_t>(&current_frame_[i * 2],
for (int32_t j = sample_size-1; j >= 0; j--) { sample & 0xFFFF);
current_frame_[i * sample_size + j] = sample & 0xFF;
sample >>= 8;
} }
} else {
// 1 byte? 4 bytes?
assert_unhandled_case(sample_size);
} }
current_frame_pos_ = 0; current_frame_pos_ = 0;
// Total size of the frame's samples // Total size of the frame's samples
frame_samples_size_ = context_->channels * decoded_frame_->nb_samples frame_samples_size_ =
* sample_size; context_->channels * decoded_frame_->nb_samples * sample_size;
to_copy = std::min(output_size, (size_t)(frame_samples_size_)); to_copy = std::min(output_size, (size_t)(frame_samples_size_));
std::memcpy(output + output_offset, current_frame_, to_copy); std::memcpy(output + output_offset, current_frame_, to_copy);
@ -233,6 +239,5 @@ int AudioDecoder::DecodePacket(uint8_t* output, size_t output_offset, size_t out
return (int)(output_offset - original_offset); return (int)(output_offset - original_offset);
} }
} // namespace xe } // namespace xe
} // namespace apu } // namespace apu

View File

@ -341,8 +341,6 @@ void AudioSystem::ProcessXmaContext(XMAContext& context, XMAContextData& data) {
: nullptr; : nullptr;
uint8_t* out = memory()->TranslatePhysical(data.output_buffer_ptr); uint8_t* out = memory()->TranslatePhysical(data.output_buffer_ptr);
assert(!in1);
// What I see: // What I see:
// XMA outputs 2 bytes per sample // XMA outputs 2 bytes per sample
// 512 samples per frame (128 per subframe) // 512 samples per frame (128 per subframe)